# Plan

1. Rearrange the files into train/test/valid folders separated into the two classes dogs and cats
2. Create sample set along with the above folders
3. Finetune and train model
4. Generate predictions
5. Validate predictions
6. Submit to kaggle

## Rearrange files

In [1]:
import os, sys, glob, numpy as np
current_dir = os.getcwd()
CODE_DIR = current_dir
DATA_DIR = current_dir + "/data"
%pwd

'/home/niraj/FastAI/catsdogs'

In [2]:
%cd $DATA_DIR
%mkdir -p train/dogs
%mkdir -p train/cats
%mkdir -p sample/test
%mkdir -p sample/train/dogs
%mkdir -p sample/train/cats
%mkdir -p sample/valid/dogs
%mkdir -p sample/valid/cats
%mkdir -p valid/dogs
%mkdir -p valid/cats
%mkdir results

/home/niraj/FastAI/catsdogs/data


## Todo: Make a folder inside results and move all the test images in that folder

In [3]:
%cd $DATA_DIR/train
images = glob.glob("*.jpg")
print(len(images))
for i in range(25000):
    if("cat" in images[i]):
        os.rename(images[i], DATA_DIR + "/train/cats/" + images[i])
    else:
        os.rename(images[i], DATA_DIR + "/train/dogs/" + images[i])

/home/niraj/FastAI/catsdogs/data/train
25000


In [4]:
%cd $DATA_DIR/train/dogs
dog_images = glob.glob("*.jpg")
for i in range(1000):
    os.rename(dog_images[i], DATA_DIR + "/valid/dogs/" + dog_images[i])

/home/niraj/FastAI/catsdogs/data/train/dogs


In [5]:
%cd $DATA_DIR/train/cats
cat_images = glob.glob("*.jpg")
for i in range(1000):
    os.rename(cat_images[i], DATA_DIR + "/valid/cats/" + cat_images[i])   

/home/niraj/FastAI/catsdogs/data/train/cats


In [6]:
from shutil import copyfile

In [7]:
%cd $DATA_DIR/train/dogs
dog_images = glob.glob("*.jpg")
for i in range(100):
    copyfile(dog_images[i], DATA_DIR + "/sample/train/dogs/" + dog_images[i])

%cd $DATA_DIR/train/cats
cat_images = glob.glob("*.jpg")
for i in range(100):
    copyfile(cat_images[i], DATA_DIR + "/sample/train/cats/" + cat_images[i]) 

/home/niraj/FastAI/catsdogs/data/train/dogs
/home/niraj/FastAI/catsdogs/data/train/cats


In [8]:
%cd $DATA_DIR/valid/dogs
dog_images = glob.glob("*.jpg")
for i in range(25):
    copyfile(dog_images[i], DATA_DIR + "/sample/valid/dogs/" + dog_images[i])

%cd $DATA_DIR/valid/cats
cat_images = glob.glob("*.jpg")
for i in range(25):
    copyfile(cat_images[i], DATA_DIR + "/sample/valid/cats/" + cat_images[i])

/home/niraj/FastAI/catsdogs/data/valid/dogs
/home/niraj/FastAI/catsdogs/data/valid/cats


In [2]:
%cd $CODE_DIR

/home/niraj/FastAI/catsdogs


# Import necessary modules and set appropriate paths

In [2]:
%matplotlib inline
import json
from importlib import reload
import utils; reload(utils)
from utils import plot
from matplotlib import pyplot as plt
import vgg16; reload(vgg16)
from vgg16 import Vgg16

Using TensorFlow backend.
  return f(*args, **kwds)


In [3]:
path = DATA_DIR + "/sample"
train_path = path + "/train"
valid_path = path + "/valid"
result_path = DATA_DIR + "/results"
test_path = DATA_DIR + "/test"

In [4]:
batch_size=8
no_of_epochs=1

# Finetune over batches of images

In [5]:
vgg = Vgg16()
batches = vgg.get_batches(train_path, batch_size=batch_size)
val_batches = vgg.get_batches(valid_path, batch_size=batch_size*2)

batches.nb_class = 2
batches.nb_sample = batches.samples
val_batches.nb_class = 2
val_batches.nb_sample = val_batches.samples

vgg.finetune(batches)

  model.add(Convolution2D(filters, 3, 3, activation='relu'))
  model.add(Convolution2D(filters, 3, 3, activation='relu'))
  model.add(Convolution2D(filters, 3, 3, activation='relu'))
  model.add(Convolution2D(filters, 3, 3, activation='relu'))


Found 200 images belonging to 2 classes.
Found 50 images belonging to 2 classes.


# Train the model

In [10]:
latest_weights_filename = None
for epoch in range(no_of_epochs):
    print("Running epoch: {}".format(epoch+1))
    vgg.fit(batches, val_batches, batch_size, nb_epoch=1)
    latest_weights_filename = "ft" + str(epoch) + ".h5"
    vgg.model.save_weights(result_path+"/"+latest_weights_filename)
print("Completed {} fit operations".format(no_of_epochs))

Running epoch: 1
Epoch 1/1


  validation_data=val_batches, validation_steps=int(np.ceil(val_batches.nb_sample/batch_size)))
  validation_data=val_batches, validation_steps=int(np.ceil(val_batches.nb_sample/batch_size)))


KeyboardInterrupt: 

# Generate predictions

In [6]:
vgg.model.load_weights(result_path + "ft0.h5")
vgg.model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lambda_1 (Lambda)            (None, 3, 224, 224)       0         
_________________________________________________________________
zero_padding2d_1 (ZeroPaddin (None, 3, 226, 226)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 64, 224, 224)      1792      
_________________________________________________________________
zero_padding2d_2 (ZeroPaddin (None, 64, 226, 226)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 64, 224, 224)      36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 64, 112, 112)      0         
_________________________________________________________________
zero_padding2d_3 (ZeroPaddin (None, 64, 114, 114)      0         
__________

In [7]:
batches, predictions = vgg.test(DATA_DIR + "/sample/test", batch_size = batch_size)

Found 24 images belonging to 1 classes.


In [10]:
batches.filenames

['unknown/1.jpg',
 'unknown/10.jpg',
 'unknown/11.jpg',
 'unknown/12.jpg',
 'unknown/13.jpg',
 'unknown/14.jpg',
 'unknown/15.jpg',
 'unknown/16.jpg',
 'unknown/17.jpg',
 'unknown/18.jpg',
 'unknown/19.jpg',
 'unknown/2.jpg',
 'unknown/20.jpg',
 'unknown/21.jpg',
 'unknown/22.jpg',
 'unknown/23.jpg',
 'unknown/24.jpg',
 'unknown/3.jpg',
 'unknown/4.jpg',
 'unknown/5.jpg',
 'unknown/6.jpg',
 'unknown/7.jpg',
 'unknown/8.jpg',
 'unknown/9.jpg']