This is an exercise to learn Convolutional Neural Networks using TensorFlow. The following code blocks helps us classify kaggle cats and dogs dataset. The folllowing is the procedure followed: 
1. Get your data into a local or a temp directory
2. Visualize a few images to validate that they are correctly loaded
3. Divide the dataset into training and validation sets
4. Build your custom ConvNet 
5. Check the sumary of your model and parametes to train
6. Compile the model to specify the optimizer, loss function and evaluation metrics
7. Create the Image Data generator to generate your images
8. Finally fit the model to data generated in the previous step
9. Test your images on unseen data 

The dataset was downloaded from the following website ""https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip" 

In [1]:
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [2]:
#This block unzips the dataset in the tmp folder
import zipfile
local_zip = 'C:/Users/Shubham Kamble/tmp/kagglecatsanddogs_3367a.zip'
zip_ref   = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('C:/Users/Shubham Kamble/tmp')
zip_ref.close()

In [3]:
#This block makes a directory for two classes cats and dogs in the tmp folder
import os 
try:
    os.mkdir('C:/Users/Shubham Kamble/tmp/cats-vs-dogs')
    local_dir = 'C:/Users/Shubham Kamble/tmp/cats-vs-dogs'
    classes = ['cats','dogs']
    
    for class_name in classes:
        os.makedirs(os.path.join(local_dir, "training", class_name))
        os.makedirs(os.path.join(local_dir, "testing", class_name))

except OSError:
    pass

In [4]:
#This block helps to stop training when a desired accuracy is reached using callbacks 
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('accuracy')>0.5):
            print("\nReached 50% accuracy so cancelling training!")
            self.model.stop_training = True
    

In [5]:
#This blocks defines a function to split the dataset into training and testing sets
import random
from shutil import copyfile
def split(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
    files = [] 
    for f in os.listdir(SOURCE):
        file = SOURCE + f
        if os.path.getsize(file)>0:
            files.append(f)
    
    files = random.sample(files, len(files))
    split_point = len(files) * SPLIT_SIZE

    for i, f in enumerate(files):
        source = os.path.join(SOURCE, f)
        if i < split_point:
            destination = os.path.join(TRAINING, f)
        else:
            destination = os.path.join(TESTING, f)
        
        copyfile(source, destination)


In [6]:
#This block uses the split(..) function to split both classes into training and testing datasets respectively
CAT_SOURCE_DIR = "C:/Users/Shubham Kamble/tmp/PetImages/Cat/"
TRAINING_CATS_DIR = "C:/Users/Shubham Kamble/tmp/cats-vs-dogs/training/cats/"
TESTING_CATS_DIR = "C:/Users/Shubham Kamble/tmp/cats-vs-dogs/testing/cats/"
DOG_SOURCE_DIR = "C:/Users/Shubham Kamble/tmp/PetImages/Dog/"
TRAINING_DOGS_DIR = "C:/Users/Shubham Kamble/tmp/cats-vs-dogs/training/dogs/"
TESTING_DOGS_DIR = "C:/Users/Shubham Kamble/tmp/cats-vs-dogs/testing/dogs/"

split_size = .9
split(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)
split(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)

In [7]:
#This block checks whether the split was a success
print(len(os.listdir('C:/Users/Shubham Kamble/tmp/cats-vs-dogs/training/cats/')))
print(len(os.listdir('C:/Users/Shubham Kamble/tmp/cats-vs-dogs/training/dogs/')))
print(len(os.listdir('C:/Users/Shubham Kamble/tmp/cats-vs-dogs/testing/cats/')))
print(len(os.listdir('C:/Users/Shubham Kamble/tmp/cats-vs-dogs/testing/dogs/')))

# Expected output:
# 11250
# 11250
# 1250
# 1250

11250
11250
1250
1250


In [8]:
#This block helps build a simple ConvNet and compile it by specifying the optimizer, 
#loss and evaluation metrics
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
    
])

model.compile(optimizer=RMSprop(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 148, 148, 16)      448       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 74, 74, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 72, 72, 32)        4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 36, 36, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 34, 34, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 17, 17, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 18496)             0

In [9]:
TRAINING_DIR = "C:/Users/Shubham Kamble/tmp/cats-vs-dogs/training/" 
train_datagen = ImageDataGenerator(rescale=1.0/255.) 
train_generator = train_datagen.flow_from_directory(TRAINING_DIR, 
                                                    batch_size=100,
                                                    class_mode='binary',
                                                    target_size=(150, 150))

VALIDATION_DIR = "C:/Users/Shubham Kamble/tmp/cats-vs-dogs/testing/"
validation_datagen = ImageDataGenerator(rescale=1.0/255.) 
validation_generator = validation_datagen.flow_from_directory(VALIDATION_DIR, 
                                                              batch_size=100,
                                                              class_mode='binary',
                                                              target_size=(150, 150))

Found 22498 images belonging to 2 classes.
Found 2500 images belonging to 2 classes.


In [None]:
callbacks = myCallback()

history = model.fit(train_generator,
                              epochs=10,
                              verbose=1,
                              validation_data=validation_generator,
                              callbacks = [callbacks])

Epoch 1/10
 14/225 [>.............................] - ETA: 4:19 - loss: 1.4766 - accuracy: 0.5107





In [None]:
#This block helps us to get the evaluation metrics and graph it to check for overfitting
acc=history.history['accuracy']
val_acc=history.history['val_accuracy']
loss=history.history['loss']
val_loss=history.history['val_loss']

epochs=range(len(acc)) # Get number of epochs


import matplotlib.pyplot as plt
#Plots the training and validation accuracy per epoch
plt.plot(epochs, acc, 'r', "Training Accuracy")
plt.plot(epochs, val_acc, 'b', "Validation Accuracy")
plt.title('Training and validation accuracy')
plt.figure()

#Plot training and validation loss per epoch
plt.plot(epochs, loss, 'r', "Training Loss")
plt.plot(epochs, val_loss, 'b', "Validation Loss")
plt.figure()



In [None]:
#This block helps to upload a real-time image and check whether our model predicts it correctly or not
# import numpy as np
# from google.colab import files
# from keras.preprocessing import image

# uploaded = files.upload()

# for fn in uploaded.keys():
#     # predicting images
#     path = '/content/' + fn
#     img = image.load_img(path, target_size=(150, 150))
#     x = image.img_to_array(img)
#     x = np.expand_dims(x, axis=0)

#     images = np.vstack([x])
#     classes = model.predict(images, batch_size=10)
#     print(classes[0])
#     if classes[0]>0.5:
#         print(fn + " is a dog")
#     else:
#         print(fn + " is a cat")