In [16]:
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator

In [17]:
# We preprocess only the images in the training set and not the test set.The reason we do preprocessing on train dataset is to avoid overfitting .
# or else our model will perform very good during training but fails badly during testing .This concept is used to avoid overfitting only in Computer vision . 
# We apply different transformation like rotating,zoom in ,zoom out ,inverting etc.This is called Image Augmentation and is basically done to avoid over learning of CNN.

In [18]:
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

# rescale option will do the same thing as minmaxscaler/feature normalization
# i.e take all the pixels and normalise it btw 0 and 1 and this is very important to do for any NN.

In [19]:
training_set = train_datagen.flow_from_directory(
        'training_set',
        target_size=(64,64),
        batch_size=32,
        class_mode='binary')

# Here as our dataset folder has images of cat and dog seperated and kept inside cat and dog folder respectively,
# when we call the above flow_from_directory function automatically keras will find out that there are two classes/sub directory here and name as 0 and 1.
# This can be found using "training_set.class_indices".So using CNN for classification is a supervised learning problem.
# We can explictily define classs also by providing "classes" argument.
# https://faroit.com/keras-docs/1.2.0/preprocessing/image/#imagedatagenerator

Found 8000 images belonging to 2 classes.


In [20]:
# For testing set ,we are not applying any transformation but only feature scaling which is important.

test_datagen = ImageDataGenerator(rescale=1./255)

In [21]:
test_set = test_datagen.flow_from_directory(
        'test_set',
        target_size=(64,64),
        batch_size=32,
        class_mode='binary')

Found 2000 images belonging to 2 classes.


In [22]:
cnn = tf.keras.models.Sequential()

In [23]:
cnn.add(tf.keras.layers.Conv2D(filters=32,kernel_size=3,activation="relu",input_shape=[64,64,3]))

# filters is the number of feature detector we want to have(This number can be tunned during hyper parameter tunning) . kernel_size is the shape of the matrix/filter, so 3 means 3x3 matrix.As this is the first layer , we will keep activation function as relu.
# Input_shape tells the size of the input images , as we have transformed our images to 64x64 ,we will provide that here.3 means the images are RGB ,for black and white images we give 1 there instead of 3.

In [24]:
cnn.add(tf.keras.layers.MaxPooling2D(pool_size=2 , strides=2))

# pool_size is the shape of the matrix what we will use for pooling . 2 means 2x2 matrix.
# strides=2 means move the pool matrix 2 pixle in each step.

In [25]:
# Adding second convolutional layer .This time we will not provide the input_shape parameter as it is only needed in the first input layer.

cnn.add(tf.keras.layers.Conv2D(filters=32,kernel_size=3,activation="relu"))
cnn.add(tf.keras.layers.MaxPooling2D(pool_size=2 , strides=2))

In [26]:
cnn.add(tf.keras.layers.Flatten()) 

# No need to provide any parameter ,keras will automatically understand that we mean to flatten the output of the above conv layers.

In [27]:
# Now we will connect all the conv layers, pooling and flatten to main NN. As computer vision is a complex problem , we give units as 128.

cnn.add(tf.keras.layers.Dense(units=128,activation="relu"))

In [28]:
cnn.add(tf.keras.layers.Dense(units=1,activation="sigmoid"))

# As we are dealing with a binary classfication ,ie.dog or cat ,we give unit as 1 for the ouput layer and activation as sigmoid ,which would have been softmax 
# if we were dealing with multi class classification problem.

In [29]:
cnn.compile(optimizer="adam",loss="binary_crossentropy",metrics=["accuracy"])

In [30]:
# In case of computer vision ,we train and evaluate the model at the same time and so we have provided validation_data in the below code.
# So one epoch consist of training and validation both. 

cnn.fit(x=training_set,validation_data=test_set,epochs=25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<tensorflow.python.keras.callbacks.History at 0x1f942d7bac8>

In [31]:
import numpy as np
from keras.preprocessing import image

In [38]:
# Size of the input images to predict method should be same as what we have used for training,i.e 64x64 

test_image = image.load_img('single_prediction/cat_new.jpg',target_size=(64,64))

# Now we have to convert the above PIL iamge format to 2D array as predict method expect input as an 2D array.

test_image = image.img_to_array(test_image)  # Now test_image is an numpy array.Dimm :64x64x3

# As we have used batch size of 32 during training ,i.e in one batch 32 images go as an input to the NN ,
# we have to do the same thing for the input image also to use predict method over it .

test_image  = np.expand_dims(test_image , axis=0)  # This will add fake dimmension.Dimm: 1x64x64x3

result = cnn.predict(test_image)  # As the input is in a batch(in our case it contain only one image which has dimmension of the batch) ,
# the ouput i.e result has dimmension of batch

In [39]:
# result[0][0] means go to the first batch and first image/element (the only image we had)

if result[0][0] > 0.5:
  prediction = "Dog"
else:
  prediction = "Cat"

In [40]:
print(prediction)

Dog
