## The image data for this program can be downloaded from 
https://www.kaggle.com/dhirensk/cats-vs-dogs-training8000test2000

## Training our Pet Classifier

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import keras

Using TensorFlow backend.


In [2]:
num_classes = 2
img_rows,img_cols = 64,64
batch_size = 100

In [3]:
train_data_dir = '/users/dipit/Documents/MLDL/CNN/Pet Classification/datasets/training_set'
validation_data_dir = '/users/dipit/Documents/MLDL/CNN/Pet Classification/datasets/test_set'
test_data_dir = '/users/dipit/Documents/MLDL/CNN/Pet Classification/datasets/test_pictures_pets'

In [5]:
from keras.preprocessing.image import ImageDataGenerator

train_data_gen = ImageDataGenerator(rescale = 1./255,
                                   rotation_range = 0.3,
                                   width_shift_range = 0.3,
                                   height_shift_range = 0.3,
                                   zoom_range = 0.3,
                                   horizontal_flip = True,
                                   vertical_flip = True,
                                   fill_mode = 'nearest')

In [6]:
train_generator = train_data_gen.flow_from_directory(directory = train_data_dir,
                                                    target_size = (img_rows,img_cols),
                                                    batch_size = batch_size,
                                                    class_mode = 'categorical',
                                                    shuffle = True)

Found 8000 images belonging to 2 classes.


In [7]:
validation_data_gen = ImageDataGenerator(rescale = 1./255)
validation_generator = validation_data_gen.flow_from_directory(directory = validation_data_dir,
                                                              target_size = (img_rows,img_cols),
                                                              batch_size = batch_size,
                                                              class_mode = 'categorical')

Found 2000 images belonging to 2 classes.


In [8]:
test_data_gen = ImageDataGenerator(rescale = 1./255)
test_generator = test_data_gen.flow_from_directory(directory=test_data_dir,
                                                  target_size = (img_rows,img_cols),
                                                  batch_size = batch_size,
                                                  class_mode = 'categorical')

Found 20 images belonging to 2 classes.


## Defining the Model

In [9]:
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D,Flatten,Dense,BatchNormalization

def convnet():
    model = Sequential([
    Conv2D(32,(3,3),padding = 'same',activation = 'relu',input_shape = (img_rows,img_cols,3)),
    Conv2D(32,(3,3),activation = 'relu',padding = 'same'),
    MaxPooling2D(pool_size=(2,2),strides = 2),
    BatchNormalization(),
        
        
    Conv2D(64,(3,3),activation = 'relu',padding = 'same'),
    Conv2D(64,(3,3),activation = 'relu',padding='same'),
    MaxPooling2D(pool_size=(2,2),strides = 2),
    BatchNormalization(),
        
    Conv2D(64,(3,3),activation = 'relu',padding = 'same'),
    Conv2D(64,(3,3),activation = 'relu',padding='same'),
    MaxPooling2D(pool_size=(2,2),strides = 2),
    BatchNormalization(),
        
    Conv2D(128,(3,3),activation = 'relu',padding = 'same'),
    Conv2D(128,(3,3),activation = 'relu',padding='same'),
    MaxPooling2D(pool_size=(2,2),strides = 2),
    BatchNormalization(),
        
    Flatten(),
    Dense(128,activation = 'relu'),
    BatchNormalization(),
    Dense(2,activation = 'sigmoid'),        
    ])
    return model


model = convnet()
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 64, 64, 32)        896       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 64, 64, 32)        9248      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 32, 32)        0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 32, 32, 32)        128       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 32, 32, 64)        18496     
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 32, 32, 64)        36928     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 16, 16, 64)       

In [10]:
from keras.callbacks import ModelCheckpoint,EarlyStopping,ReduceLROnPlateau
#checkpoint = ModelCheckpoint("pet_classifier_improved_model.h5",
                            #monitor = 'val_accuracy',
                            #mode = 'min',
                            #save_best_only = True,
                            #verbose = 1)

earlystop = EarlyStopping(monitor = 'val_accuracy',
                         min_delta = 0,
                         patience = 5,
                         verbose =1,
                         restore_best_weights = True)
reduce_lr = ReduceLROnPlateau(monitor = 'val_accuracy',
                             factor = 0.2,
                             patience = 3,
                             verbose = 1,
                             min_delta = 0.0001)
callbacks = [earlystop,reduce_lr]

In [11]:
optimizer = keras.optimizers.Adam(lr = 0.001)
model.compile(optimizer = optimizer,
             loss = 'categorical_crossentropy',
             metrics = ['accuracy'])

In [12]:
num_train_samples = 8000
num_val_samples = 2000

In [13]:
history = model.fit_generator(train_generator,
                              epochs = 20,
                              #steps_per_epoch = (num_train_samples/batch_size),
                              validation_data = validation_generator,
                              #validation_steps = (num_val_samples/batch_size),
                              callbacks = callbacks)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20

Epoch 00014: ReduceLROnPlateau reducing learning rate to 4.0000001899898055e-05.
Epoch 15/20
Epoch 16/20
Restoring model weights from the end of the best epoch
Epoch 00016: early stopping


In [14]:
scores = model.evaluate_generator(validation_generator,#steps = num_val_samples // batch_size,
                                 verbose = 1)
print("\n Test Result: %3.f loss:%3.f" %(scores[1]*100 , scores[0]))


 Test Result:  79 loss:  1


In [15]:
model.save('pet_classifier_improved_model.h5')

In [16]:
new_model = keras.models.load_model('pet_classifier_improved_model.h5')

In [17]:
y_pred = new_model.predict(validation_generator, steps = num_val_samples // batch_size)

In [18]:
y_pred_label = np.argmax(y_pred, axis=1)

In [19]:
y_pred_label

array([1, 1, 1, ..., 0, 0, 1])

## Confusion Matrix

In [20]:
from sklearn.metrics import confusion_matrix,classification_report
print('Confusion Matrix')
print(confusion_matrix(validation_generator.classes,y_pred_label))
print('Classification Report')
class_labels = validation_generator.class_indices
class_labels = {v: k for k, v in class_labels.items()}
classes = list(class_labels.values())
print(classification_report(validation_generator.classes,y_pred_label,target_names = classes))

Confusion Matrix
[[450 550]
 [466 534]]
Classification Report
              precision    recall  f1-score   support

        cats       0.49      0.45      0.47      1000
        dogs       0.49      0.53      0.51      1000

    accuracy                           0.49      2000
   macro avg       0.49      0.49      0.49      2000
weighted avg       0.49      0.49      0.49      2000



In [21]:
from keras.preprocessing import image
import os
import cv2
from os import listdir
from os.path import isfile , join
import re

def draw_test(name,pred,im,true_label):
    BLACK = [0,0,0]
    expanded_image = cv2.copyMakeBorder(im,160,0,0,500,cv2.BORDER_CONSTANT,value =BLACK)
    cv2.putText(expanded_image,"predicted - "+pred,(20,60),cv2.FONT_HERSHEY_SIMPLEX,1,(0,0,225),2)
    cv2.putText(expanded_image,"true - "+true_label,(20,120),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0),2)
    cv2.imshow(name,expanded_image)
    
def getRandomImage(path,img_width,img_height):
    folders = list(filter(lambda x:os.path.isdir(os.path.join(path,x)),os.listdir(path)))
    random_directory = np.random.randint(0,len(folders))
    path_class = folders[random_directory]
    file_path = path + '/' + path_class
    file_names = [f for f in listdir(file_path) if isfile(join(file_path,f))]
    random_file_index = np.random.randint(0,len(file_names))
    image_name = file_names[random_file_index]
    final_path = file_path + "/" + image_name
    return image.load_img(final_path,target_size = (img_width,img_height)),final_path,path_class

img_width,img_height = 64,64

files =[]
predictions =[]
true_labels = []

for i in range(0,10):
    #path = '/users/dipit/Documents/MLDL/CNN/fruits-360/test-multiple_fruits'
    path = '/users/dipit/Documents/MLDL/CNN/Pet Classification/datasets/test_pictures_pets'
    img,final_path,true_label = getRandomImage(path,img_width,img_height)
    files.append(final_path)
    true_labels.append(true_label)
    x = image.img_to_array(img)
    x = x * 1./255
    x = np.expand_dims(x,axis = 0)
    images = np.vstack([x])
    classes = model.predict_classes(images,batch_size = 10)
    predictions.append(classes)
    
for i in range(0,len(files)):
    image = cv2.imread((files[i]))
    draw_test("Prediction",class_labels[predictions[i][0]],image,true_labels[i])
    cv2.waitKey(0)
    
cv2.destroyAllWindows()