# Importing the Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator, load_img
from sklearn.model_selection import train_test_split

# Preparing the training dataset

In [2]:
training_set_filenames=os.listdir('./dataset/train')
classes=[]
for i in training_set_filenames:
    pet_name=i.split('.')[0]
    if pet_name=='dog':
        classes.append('dog')
    elif pet_name=='cat':
        classes.append('cat')
df=pd.DataFrame({
    'file_name':training_set_filenames,
    'class':classes
})

# Splitting the dataframe into training and validation dataframes

In [3]:
train_df,validation_df=train_test_split(df,test_size=0.2,random_state=40)
train_df=train_df.reset_index(drop=True)
validation_df=validation_df.reset_index(drop=True)
print(train_df.shape)
print(validation_df.shape)

(20000, 2)
(5000, 2)


# Data Preprocessing

Preprocessing the training set

In [4]:
train_data_gen=ImageDataGenerator(
    rescale=1./255,
    zoom_range=0.2,
    shear_range=0.1,
    rotation_range=15,
    horizontal_flip=True,
    width_shift_range=0.1,
    height_shift_range=0.1)
"""rescale is like feature scaling. In feature scaling, we reduce the values to a certain range. Here, rescale will reduce each 
pixel value by dividing with 255 so that pixel values range between 0 and 1"""
#feature scaling is compulsory for training neural networks
#ImageDataGenerator is a class, training_data_gen is an object created for that class, flow_from_dataframe is a method of ImageDataGenerator class

training_set=train_data_gen.flow_from_dataframe(train_df, "./dataset/train", x_col='file_name', y_col='class', target_size=(128,128), class_mode='categorical', batch_size=15)
#target_size: It is the final size of images. If the image size is big, it will take long time for training
#batch_size: It means how many images we want in each batch. Explanation of what is batch_size is explained in this link: https://stats.stackexchange.com/questions/153531/what-is-batch-size-in-neural-network
#class_mode: Since, we have only 2 outcomes i.e, cat or dog, class_mode can be set to either binary or categorical. If more than 2 classes are present, then it should be categorical

Found 20000 validated image filenames belonging to 2 classes.


Preprocessing the validation set

In [5]:
#For validation set, we should not apply any transformations (because while predicting, we won't apply any transformations) but feature scaling should be applied to the pixels because during training, we have used feature scaling
validation_data_gen=ImageDataGenerator(
    rescale=1./255)

validation_set=validation_data_gen.flow_from_dataframe(validation_df, "./dataset/train", x_col='file_name', y_col='class', target_size=(128,128), class_mode='categorical', batch_size=15)

Found 5000 validated image filenames belonging to 2 classes.


# Building the CNN

Initialize the CNN

In [6]:
cnn=tf.keras.models.Sequential()
#Sequential class: It groups a linear stack of layers into a tf.keras.Model
#Sequential provides training and inference features on this model.

Adding the layers

In [7]:
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[128,128,3]))
#filters: Used to mention number of feature detectors we want to use
#kernel_size: Used to mention size of the feature detector
#activation: As long as we haven't reached the output layer, we need to use rectifier activation function. (ReLU layer)
#input_shape: When you add your first convolution layer or a dense layer, we have to specify the input shape of your inputs. As in data preprocessing step, we resized the images to 64*64, so use 64,64,3. (for B&W images, use 64,64,1)

cnn.add(tf.keras.layers.BatchNormalization())

cnn.add(tf.keras.layers.MaxPool2D(pool_size=2))
#max pooling is applied

cnn.add(tf.keras.layers.Dropout(0.25))

cnn.add(tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'))

cnn.add(tf.keras.layers.BatchNormalization())

cnn.add(tf.keras.layers.MaxPool2D(pool_size=2))

cnn.add(tf.keras.layers.Dropout(0.25))

cnn.add(tf.keras.layers.Conv2D(filters=128, kernel_size=3, activation='relu'))

cnn.add(tf.keras.layers.BatchNormalization())

cnn.add(tf.keras.layers.MaxPool2D(pool_size=2))

cnn.add(tf.keras.layers.Dropout(0.25))

#Flattening
cnn.add(tf.keras.layers.Flatten())

#Full Connection
cnn.add(tf.keras.layers.Dense(units=512, activation='relu'))
#units: It defines the number of hidden neurons you want to have into this fully connected layer.

cnn.add(tf.keras.layers.BatchNormalization())

cnn.add(tf.keras.layers.Dropout(0.5))

#Output Layer
cnn.add(tf.keras.layers.Dense(units=2, activation='softmax'))
#Here, number of units should be 1 if the class_mode is binary.
#For binary classification, use sigmoid activation function.
#For multi label classification, use softmax activation function

In [8]:
cnn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 126, 126, 32)      896       
_________________________________________________________________
batch_normalization (BatchNo (None, 126, 126, 32)      128       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 63, 63, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 63, 63, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 61, 61, 64)        18496     
_________________________________________________________________
batch_normalization_1 (Batch (None, 61, 61, 64)        256       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 30, 30, 64)        0

# Training the CNN

Compiling the CNN

In [9]:
cnn.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
#Optimizers are Classes or methods used to change the attributes of your machine/deep learning model such as weights and 
#learning rate in order to reduce the losses. Optimizers help to get results faster. TensorFlow mainly supports 9 optimizer 
#classes

#The Loss Function is one of the important components of Neural Network. Loss is nothing but a prediction error of Neural 
#Net. And the method to calculate the loss is called Loss Function. Loss is used to calculate the gradients for the neural net. 
#And gradients are used to update the weights. This is how a Neural Net is trained.

#We used categorical_crossentropy as loss function as we have multi-label classification
#Use binary_crossentropy as loss function when we have binary classification

Defining Callbacks and Learning rate

In [10]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
earlystop=EarlyStopping(patience=10)
learning_rate_reduction=ReduceLROnPlateau(monitor='val_accuracy', patience=2, verbose=1, factor=0.5, min_lr=0.00001)
callbacks=[earlystop, learning_rate_reduction]

Training the CNN

In [11]:
cnn.fit(x=training_set, validation_data=validation_set, epochs=10, validation_steps=5000//15, steps_per_epoch=20000//15, callbacks=callbacks)
#5000 is the validation data set size
#20000 is the training data set size
#15 is the batch size

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x2b03a902b50>

Save the model weights

In [12]:
cnn.save('pet_classification.h5')

# Prediction for Single Input

In [24]:
training_set.class_indices

{'cat': 0, 'dog': 1}

In [70]:
from PIL import Image
import numpy as np
im=Image.open("dataset/single_prediction/cat_or_dog_4.jpg")
im=im.resize((128,128))

#While preprocessing our training set, we created batches of images with batch_size=32. But, here we are making only single prediction. So, here we need to create batches with batch_size=1. (We should not pass single image directly without forming batches)
im=np.expand_dims(im,axis=0)
#expand_dims: It expands the shape of an array. It inserts a new axis that will appear at the axis position in the expanded array shape. For more explanation: https://numpy.org/doc/stable/reference/generated/numpy.expand_dims.html

im=np.array(im)
im=im/255   #we have applied feature scaling here because while training we have done the same

#predict method
pred=cnn.predict_classes([im])[0]
if pred==0:
    print("It's a Cat!")
elif pred==1:
    print("It's a Dog!")

It's a Cat!
