In [25]:
import os
import random
from math import floor
test_dir = os.path.join("../data/test")
train_dir = os.path.join("../data/train")
validation_dir = os.path.join("../data/validation")

In [39]:
#Code used to seperate training data into seperate folders
#Does not need to be run since data is included in repo
def getCatsAndDogs(directory):
    cats = []
    dogs = []
    files = os.listdir(directory)
    # if file starts with c it is a cat file
    for file in files:
        if (file[0] == 'c'):
            cats.append(file)
        else:
            dogs.append(file)
    return cats,dogs

def makeCatsDogsDirs(trainDir):
    cats,dogs = getCatsAndDogs(trainDir)
    for cat,dog in zip(cats,dogs):
        curr = os.path.join(trainDir,cat)
        new = os.path.join(trainDir,"cats",cat)
        os.rename(curr,new)
        curr = os.path.join(trainDir,dog)
        new = os.path.join(trainDir,"dogs",dog)
        os.rename(curr,new)
        
def createValidationDirs(valDir,trainDir,valSplit):
    #retrieve file names for path construction
    filesCat = os.listdir(os.path.join(trainDir,"cats"))
    filesDog = os.listdir(os.path.join(trainDir,"dogs"))
    #get num files for validation set
    numCatVal = floor(len(filesCat) * valSplit)
    numDogVal = floor(len(filesDog) * valSplit)
    #Get random sample
    catSample = random.sample(range(0,len(filesCat)),numCatVal)
    dogSample = random.sample(range(0,len(filesDog)),numDogVal)
    #cosntruct indexes
    cats = [filesCat[i] for i in catSample]
    dogs = [filesDog[i] for i in dogSample]
    #make validation folders
    for cat,dog in zip(cats,dogs):
        curr = os.path.join(trainDir,"cats",cat)
        new = os.path.join(valDir,"cats",cat)
        os.rename(curr,new)
        curr = os.path.join(trainDir,"dogs",dog)
        new = os.path.join(valDir,"dogs",dog)
        os.rename(curr,new)
    

In [54]:
from keras.applications import VGG19

conv_base = VGG19(
    weights = 'imagenet',
    include_top = False,
    input_shape = ((150,150,3))
)
conv_base.trainable = False

In [55]:
conv_base.summary()
#Checking the final layer output so we can fit classifier

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 150, 150, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 150, 150, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 150, 150, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 75, 75, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 75, 75, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 75, 75, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 37, 37, 128)       0         
__________

In [57]:
from keras import layers
from keras import models
from keras import optimizers

model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

In [64]:
from keras.preprocessing.image import ImageDataGenerator
import numpy as np
from IPython.display import clear_output


train_datagen = ImageDataGenerator(
    rescale=1./255, 
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=100,
    class_mode='binary')

validation_datagen = ImageDataGenerator(rescale=1./255)

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(150, 150),
    batch_size=25,
    class_mode='binary')

model.compile(
    loss='binary_crossentropy', 
    optimizer=optimizers.RMSprop(lr=2e-5), 
    metrics=['acc'])
#Num Samples = batch_size * steps_per_epoch
#Train   5000 = 100 * 50 -- Doing fourth the data to reduce run time
#Val     1250 = 25 * 50 -- Doing fourth as well

history = model.fit_generator(
    train_generator,
    steps_per_epoch=50,
    epochs=30,
    validation_data=validation_generator,
    validation_steps=50
)

Found 20000 images belonging to 2 classes.
Found 5000 images belonging to 2 classes.
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
