## Classification of Cats vs Dogs using CNN with Tensorflow

## Download and unzip the dataset

In [0]:
import zipfile
import os
import random
from shutil import copyfile

In [2]:
# If the URL doesn't work, visit https://www.microsoft.com/en-us/download/confirmation.aspx?id=54765
# And right click on the 'Download Manually' link to get a new URL to the dataset

# Note: This is a very large dataset and will take time to download

!wget --no-check-certificate \
    "https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip" \
    -O "/tmp/cats-and-dogs.zip"

local_zip = '/tmp/cats-and-dogs.zip'
zip_ref   = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp')
zip_ref.close()

--2019-05-31 09:13:13--  https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip
Resolving download.microsoft.com (download.microsoft.com)... 23.44.100.235, 2a02:26f0:c800:29a::e59, 2a02:26f0:c800:28d::e59
Connecting to download.microsoft.com (download.microsoft.com)|23.44.100.235|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 824894548 (787M) [application/octet-stream]
Saving to: ‘/tmp/cats-and-dogs.zip’


2019-05-31 09:13:42 (27.2 MB/s) - ‘/tmp/cats-and-dogs.zip’ saved [824894548/824894548]



## Define paths for data

In [0]:
try:
    os.mkdir('/tmp/cats_vs_dogs')
    os.mkdir('/tmp/cats_vs_dogs/train')
    os.mkdir('/tmp/cats_vs_dogs/test')
    os.mkdir('/tmp/cats_vs_dogs/train/cats')
    os.mkdir('/tmp/cats_vs_dogs/train/dogs')
    os.mkdir('/tmp/cats_vs_dogs/test/cats')
    os.mkdir('/tmp/cats_vs_dogs/test/dogs')
except OSError:
    pass

## Split the data into train and test

In [4]:
def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
    files = []
    for filename in os.listdir(SOURCE):
        file = SOURCE + filename
        if os.path.getsize(file) > 0:
            files.append(filename)
        else:
            print(filename + " is zero length, so ignoring.")

    training_length = int(len(files) * SPLIT_SIZE)
    testing_length = int(len(files) - training_length)
    shuffled_set = random.sample(files, len(files))
    training_set = shuffled_set[0:training_length]
    testing_set = shuffled_set[-testing_length:]

    for filename in training_set:
        this_file = SOURCE + filename
        destination = TRAINING + filename
        copyfile(this_file, destination)

    for filename in testing_set:
        this_file = SOURCE + filename
        destination = TESTING + filename
        copyfile(this_file, destination)


CAT_SOURCE_DIR = "/tmp/PetImages/Cat/"
TRAINING_CATS_DIR = "/tmp/cats_vs_dogs/train/cats/"
TESTING_CATS_DIR = "/tmp/cats_vs_dogs/test/cats/"
DOG_SOURCE_DIR = "/tmp/PetImages/Dog/"
TRAINING_DOGS_DIR = "/tmp/cats_vs_dogs/train/dogs/"
TESTING_DOGS_DIR = "/tmp/cats_vs_dogs/test/dogs/"

split_size = .9
split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)
split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)

# Expected output
# 666.jpg is zero length, so ignoring
# 11702.jpg is zero length, so ignoring

666.jpg is zero length, so ignoring.
11702.jpg is zero length, so ignoring.


## Main code starts

In [0]:
# import all modules

import numpy as np
import matplotlib.image  as mpimg
import matplotlib.pyplot as plt

import tensorflow as tf
from keras.preprocessing import image
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [7]:
## define metadata

print("\nDefining metadata")
Nrows = 150
Ncols = 150
BATCH_SIZE = 100
NUM_EPOCHS = 2
FILTER_SIZE = (3,3)
model_path = "/tmp/cats_vs_dogs/models/model"+"_R"+str(Nrows)+"_C"+str(Ncols)+"_fs"+str(FILTER_SIZE[0])+"_ep"+str(NUM_EPOCHS)+".h5"
print("\nMetadata defined")


Defining metadata

Metadata defined


In [0]:
# Function to plot Loss and Accuracy of a model

def plot_results(history):
    # Retrieve a list of list results on training and val data
    # sets for each training epoch
    acc=history.history['acc']
    val_acc=history.history['val_acc']
    loss=history.history['loss']
    val_loss=history.history['val_loss']
    
    # Get number of epochs
    epochs=range(len(acc)) 
    
    # Plot training and validation accuracy per epoch
    plt.plot(epochs, acc, 'r', "Training Accuracy")
    plt.plot(epochs, val_acc, 'b', "Validation Accuracy")
    plt.title('Training and validation accuracy')
    plt.figure()
    
    # Plot training and validation loss per epoch
    plt.plot(epochs, loss, 'r', "Training Loss")
    plt.plot(epochs, val_loss, 'b', "Validation Loss")
    
    plt.title('Training and validation loss')


In [0]:
# define callbacks

class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epochs, logs={}):
        if (logs.get('acc') > 0.99):
            self.model.stop_training = True
            print ("\nStopping training as accuracy is above 99%")
callback = myCallback()

In [0]:
# define model

def fetch_model(train_gen, val_gen):
    try:
        print("\nLoading saved model")
        model = tf.keras.models.load_model(model_path)
        print("\nmodel loaded")
    except:
        print("\nModel not found. Training new model...")
        model = tf.keras.Sequential([tf.keras.layers.Conv2D(16, FILTER_SIZE, activation='relu', input_shape=(Nrows,Ncols,3)),
                                     tf.keras.layers.MaxPooling2D(2,2),
                                     tf.keras.layers.Conv2D(32, FILTER_SIZE, activation='relu'),
                                     tf.keras.layers.MaxPooling2D(2,2),
                                     tf.keras.layers.Conv2D(64, FILTER_SIZE, activation='relu'),
                                     tf.keras.layers.MaxPooling2D(2,2),
                                     tf.keras.layers.Flatten(),
                                     tf.keras.layers.Dense(256, activation='relu'),
                                     tf.keras.layers.Dense(1, activation='sigmoid')])
        ## compile model
        model.compile(optimizer=RMSprop(lr=0.001),
                      loss='binary_crossentropy',
                      metrics=['acc'])
        model.summary()
        ## fit model to data - training
        history = model.fit_generator(train_gen,
                                      epochs=NUM_EPOCHS,
                                      validation_data=val_gen,
                                      verbose=1,
                                      callbacks=[callback])
        print("\nNew model trained")
        ## save model to file
        print("\nSaving model for later use...")
        model.save(model_path)
        print("\nModel Successfully saved")
        ## plot results
        print("\nPlotting results...")
        plot_results(history)
        print("\n........................")
    return model


# cats vs dogs classification


In [12]:
## load data - change directories to the location of data

print("\nLoading data...")
train_dir = "/tmp/cats_vs_dogs/train"
test_dir = "/tmp/cats_vs_dogs/test"

data_gen = ImageDataGenerator(rescale=1/255.0)

train_gen = data_gen.flow_from_directory(
                    train_dir,
                    target_size=((Nrows, Ncols)),
                    batch_size=BATCH_SIZE,
                    class_mode='binary')
test_gen = data_gen.flow_from_directory(
                    test_dir,
                    target_size=((Nrows, Ncols)),
                    class_mode='binary')
print("\nData Generators defined")


Loading data...
Found 22498 images belonging to 2 classes.
Found 2500 images belonging to 2 classes.

Data Generators defined


In [0]:
## visualize data


In [0]:
# fetch model (training)

print("\nTraining model...")
model = fetch_model(train_gen, test_gen)
print("\nTraining Complete")




Training model...

Loading saved model
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.

model loaded

Training Complete

Evaluating model on test data


  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))



Evaluations:
[0.8065819791496047, 0.8164734]

Testing Complete


In [13]:
# predict for new uploaded images

import numpy as np
from google.colab import files
from keras.preprocessing import image

uploaded = files.upload()

for fn in uploaded.keys():
 
  # predicting images
  path = '/content/' + fn
  img = image.load_img(path, target_size=(Nrows, Ncols))
  x = image.img_to_array(img)
  x = np.expand_dims(x, axis=0)

  images = np.vstack([x])
  classes = model.predict(images, batch_size=10)
  print(classes[0])
  if classes[0]>0.5:
    print(fn + " is a dog")
  else:
    print(fn + " is a cat")

MessageError: ignored