# Classification of cats and dogs.

Three statistical models including logistic regression, linear regression, and optimal separating hyperplanes were used as neural networks. The prediction accuracy of the validation dataset for the logistic regression binary, logistic regression multinormial, linear regression, and optimal separating hyperplanes are 0.5,  0.52, 0.56, 0.5, respectively. Compared to previous one single hidden layer model, logistic regression multinormial and linear regression model performs a little bit better. 



In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow.keras as keras
import os
import numpy as np
import matplotlib.pyplot as plt
import PIL as PIL

#_URL = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip'

#path_to_zip = tf.keras.utils.get_file('/Users/jiaxingchen/CoursesWorkshops/PSU/2020Spring/deep_learning/homework2/cats_and_dogs.zip', origin=_URL, extract=True)

#PATH = os.path.join(os.path.dirname(path_to_zip), 'cats_and_dogs_filtered')

PATH = '/Users/jiaxingchen/CoursesWorkshops/PSU/2020Spring/deep_learning/homework2/cats_and_dogs_filtered'


In [3]:
##setting the dir of the datasets; 
train_dir = os.path.join(PATH, 'train')
validation_dir = os.path.join(PATH, 'validation')

train_cats_dir = os.path.join(train_dir, 'cats')  # directory with our training cat pictures
train_dogs_dir = os.path.join(train_dir, 'dogs')  # directory with our training dog pictures
validation_cats_dir = os.path.join(validation_dir, 'cats')  # directory with our validation cat pictures
validation_dogs_dir = os.path.join(validation_dir, 'dogs')  # directory with our validation dog pictures


In [4]:
#load images using ImageDataGenerator; 
batch_size = 1000
epochs = 15
IMG_HEIGHT = 150
IMG_WIDTH = 150
train_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our training data
validation_image_generator = ImageDataGenerator(rescale=1./255) # Generator for our validation data

train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size,
                                                           directory=train_dir,
                                                           shuffle=True,
                                                           target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                           class_mode='binary')
validation_data_gen = validation_image_generator.flow_from_directory(batch_size=batch_size,
                                                           directory=validation_dir,
                                                           target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                           class_mode='binary')


Found 2000 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.


In [6]:
## how did a simple model do here: 
import tensorflow.keras as keras
from keras import models
from keras import layers

sample_training_images, sample_training_labels = next(train_data_gen)
sample_validation_images, sample_validation_labels = next(validation_data_gen)
sample_training_images = sample_training_images.reshape((1000, 150 * 150 *3))
sample_training_images = sample_training_images.astype('float32') / 255
sample_validation_images = sample_validation_images.reshape((1000, 150 * 150 *3))
sample_validation_images = sample_validation_images.astype('float32') / 255
from keras.utils import to_categorical
sample_training_labels = to_categorical(sample_training_labels)
sample_validation_labels = to_categorical(sample_validation_labels)
print(sample_training_images.shape)
print(sample_training_labels.shape)


(1000, 67500)
(1000, 2)


In [13]:
# logistic regression (binary)  
import keras.backend as K
def loss_logit(y_actual,y_predicted):
    loss = K.log(1+K.exp(-(y_actual*y_predicted)))
    return(loss)

#fit the mdoel 
network = models.Sequential()
network.add(layers.Dense(512, input_shape=(150*150*3,)))
network.add(layers.Dense(1, activation='sigmoid'))
network.compile(optimizer='rmsprop',loss=loss_logit,metrics=['accuracy'])
network.fit(sample_training_images, sample_training_labels, epochs=10, batch_size=128)

test_loss, test_acc = network.evaluate(sample_validation_images, sample_validation_labels)
print(test_acc)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
0.5


In [27]:
# logistic regression (multinormial) 
#fit the mdoel 
network = models.Sequential()
network.add(layers.Dense(512, activation='linear', input_shape=(150*150*3,)))
network.add(layers.Dense(2, activation='sigmoid'))
network.compile(optimizer='rmsprop',loss='categorical_crossentropy',metrics=['accuracy'])
network.fit(sample_training_images, sample_training_labels, epochs=10, batch_size=128)

test_loss, test_acc = network.evaluate(sample_validation_images, sample_validation_labels)
print(test_acc)



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
0.5210000276565552


In [22]:
# linear regression 
network = models.Sequential()
network.add(layers.Dense(2, activation='linear'))
network.compile(optimizer='rmsprop',loss='mean_squared_error',metrics=['accuracy'])
network.fit(sample_training_images, sample_training_labels, epochs=10, batch_size=128)

test_loss, test_acc = network.evaluate(sample_validation_images, sample_validation_labels)
print(test_acc)



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
0.5649999976158142


In [26]:
# optimal separating hyperplanes
network = models.Sequential()
network.add(layers.Dense(2, activation='linear'))
network.compile(optimizer='adadelta',loss='hinge',metrics=['accuracy'])
network.fit(sample_training_images, sample_training_labels, epochs=10, batch_size=128)

test_loss, test_acc = network.evaluate(sample_validation_images, sample_validation_labels)
print(test_acc)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
0.5
