In [1]:
import os
import random
import shutil
from shutil import copyfile
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
root_dir = '/Users/vuhan/Desktop/dog-vs-cat-classification/data'

# if os.path.exists(root_dir):
#     shutil.rmtree(root_dir)

def create_train_test_dir(root_path):
    directories = ['training', 'testing', 'training/cats', 'training/dogs', 'testing/cats', 'testing/dogs']
    
    for direct in directories:
        os.makedirs(os.path.join(root_path, direct))


In [3]:
create_train_test_dir(root_path = root_dir)

In [4]:
for rootdir, dirs, files in os.walk(root_dir):
    for subdir in dirs:
        print(os.path.join(rootdir, subdir))

/Users/vuhan/Desktop/dog-vs-cat-classification/data/dogs
/Users/vuhan/Desktop/dog-vs-cat-classification/data/cats
/Users/vuhan/Desktop/dog-vs-cat-classification/data/training
/Users/vuhan/Desktop/dog-vs-cat-classification/data/testing
/Users/vuhan/Desktop/dog-vs-cat-classification/data/training/dogs
/Users/vuhan/Desktop/dog-vs-cat-classification/data/training/cats
/Users/vuhan/Desktop/dog-vs-cat-classification/data/testing/dogs
/Users/vuhan/Desktop/dog-vs-cat-classification/data/testing/cats


In [5]:
def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
    files = []
    for filename in os.listdir(SOURCE):
        file = SOURCE + filename
        if os.path.getsize(file) > 0:
            files.append(filename)
        else:
            print('File {} is empty file.'.format(filename))
    
    training_size = int(len(files) * SPLIT_SIZE)
    shuffle_set = random.sample(files, len(files))
    
    training_set = shuffle_set[: training_size]
    testing_set = shuffle_set[training_size: ]
    
    for filename in training_set:
        source_file = SOURCE + filename
        dest_file = TRAINING + filename
        copyfile(source_file, dest_file)
    
    for filename in testing_set:
        source_file = SOURCE + filename
        dest_file = TESTING + filename
        copyfile(source_file, dest_file)

In [6]:
CAT_SOURCE_DIR = '/Users/vuhan/Desktop/dog-vs-cat-classification/data/cats/'
DOG_SOURCE_DIR = '/Users/vuhan/Desktop/dog-vs-cat-classification/data/dogs/'

TRAINING_DIR = '/Users/vuhan/Desktop/dog-vs-cat-classification/data/training/'
TESTING_DIR = '/Users/vuhan/Desktop/dog-vs-cat-classification/data/testing/'

TRAINING_CAT_DIR = '/Users/vuhan/Desktop/dog-vs-cat-classification/data/training/cats/'
TESTING_CAT_DIR = '/Users/vuhan/Desktop/dog-vs-cat-classification/data/testing/cats/'

TRAINING_DOG_DIR = '/Users/vuhan/Desktop/dog-vs-cat-classification/data/training/dogs/'
TESTING_DOG_DIR = '/Users/vuhan/Desktop/dog-vs-cat-classification/data/testing/dogs/'

SPLIT_SIZE = 0.9

if len(os.listdir(TRAINING_CAT_DIR)) > 0:
    for file in os.scandir(TRAINING_CAT_DIR):
        os.remove(file.path)
if len(os.listdir(TRAINING_DOG_DIR)) > 0:
    for file in os.scandir(TRAINING_DOG_DIR):
        os.remove(file.path)
if len(os.listdir(TESTING_CAT_DIR)) > 0:
    for file in os.scandir(TESTING_CAT_DIR):
        os.remove(file.path)
if len(os.listdir(TESTING_DOG_DIR)) > 0:
    for file in os.scandir(TESTING_DOG_DIR):
        os.remove(file.path)



split_data(CAT_SOURCE_DIR, TRAINING_CAT_DIR, TESTING_CAT_DIR, SPLIT_SIZE)
split_data(DOG_SOURCE_DIR, TRAINING_DOG_DIR, TESTING_DOG_DIR, SPLIT_SIZE)

print('There are {} images on cats for training.'.format(len(os.listdir(TRAINING_CAT_DIR))))
print('There are {} images on dogs for training.'.format(len(os.listdir(TRAINING_DOG_DIR))))
print('There are {} images on cats for testing.'.format(len(os.listdir(TESTING_CAT_DIR))))
print('There are {} images on dogs for testing.'.format(len(os.listdir(TESTING_CAT_DIR))))

There are 11250 images on cats for training.
There are 11250 images on dogs for training.
There are 1250 images on cats for testing.
There are 1250 images on dogs for testing.


In [7]:
TRAINING_DIR = '/Users/vuhan/Desktop/dog-vs-cat-classification/data/training/'
TESTING_DIR = '/Users/vuhan/Desktop/dog-vs-cat-classification/data/testing/'

def train_test_generator(train_dir, test_dir):
    train_datagen = ImageDataGenerator(rescale = 1./255,
                                       rotation_range = 40,
                                       zoom_range = 0.2,
                                       shear_range = 0.2,
                                       horizontal_flip = True,
                                       width_shift_range = 0.2,
                                       height_shift_range = 0.2,
                                       fill_mode = 'nearest')
    
    train_generator = train_datagen.flow_from_directory(train_dir,
                                                  target_size = (128, 128),
                                                  batch_size = 32,
                                                  class_mode = 'binary')
    
    test_datagen = ImageDataGenerator(rescale = 1./255)
    
    test_generator = test_datagen.flow_from_directory(test_dir,
                                                      target_size = (128, 128),
                                                      batch_size = 32,
                                                      class_mode = 'binary')
    return train_generator, test_generator

In [8]:
train_generator, test_generator = train_test_generator(TRAINING_DIR, TESTING_DIR)

Found 22500 images belonging to 2 classes.
Found 2500 images belonging to 2 classes.


In [9]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), input_shape = (128, 128, 3), activation = 'relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(32, (3, 3), activation = 'relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
#     tf.keras.layers.Conv2D(64, (3, 3), activation = 'relu'),
#     tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.2),
#     tf.keras.layers.Dense(256, activation = 'relu'),
    tf.keras.layers.Dense(128, activation = 'relu'),
    tf.keras.layers.Dense(1, activation = 'sigmoid')
])

model.compile(loss = 'binary_crossentropy',
              optimizer = 'adam',
              metrics = ['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 126, 126, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 63, 63, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 61, 61, 32)        9248      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 30, 30, 32)       0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 28800)             0         
                                                                 
 dropout (Dropout)           (None, 28800)             0

In [10]:
import time
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

early_stop = EarlyStopping(monitor = 'val_loss', patience = 5, min_delta = 0.001)
model_check = ModelCheckpoint('dogcat_checkpoint.h5')


t1 = time.time()
history = model.fit(train_generator,
                    validation_data = test_generator,
                    epochs = 150,
                    callbacks = [early_stop, model_check])
t2 = time.time()
print('\nCNN training took: {:.2f} minutes.'.format((t2 - t1) / 60))

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150

KeyboardInterrupt: 