# Image classification with small data

1. Download and setup dataset (Kaggle Cats & Dogs) https://www.kaggle.com/c/dogs-vs-cats/data
2. Train a small convnet on our small data (acc ~ 0.75)
3. Train a MLP using bottleneck features of pretrained model (acc ~ 0.90)
4. Finetuning top layers of pretrained model on our small data (acc ~ 0.94)


In [None]:
# imports
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
from keras import applications
import numpy as np
from keras import Model
from keras import optimizers
import matplotlib.pyplot as plt

In [None]:
# define plot & parse functions
def parse_log_file(pfile):
    f= open(pfile).readlines()
    lines = [i.strip() for i in f]
    log= {k:[] for k in ['acc','val_acc','loss','val_loss']}
    for l in lines:
        if 'loss' in l:
            for k in log.keys():
                log[k].append(float(l.split(k+': ')[1].split(' -')[0]))
    return log 

def plot_loss_acc(pfile):
    history = parse_log_file(pfile)
    plt.plot(history['loss'])
    plt.plot(history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()
    plt.plot(history['acc'])
    plt.plot(history['val_acc'])
    plt.title('model acc')
    plt.ylabel('acc')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()

In [None]:
img_width, img_height = 150, 150
input_shape = (img_width, img_height, 3)

## Train a small convnet on our small data

In [None]:
# define a sequential model (small conv net): 
# 3 conv blocks (Conv2D, Activation('relu'), MaxPooling2D) + 2 dense layers
# Conv_1: filters 32, kernel size(3,3)
# Conv_2: filters 32, kernel size(3,3)
# Conv_3: filters 64, kernel size(3,3)
# Flatten
# Dense_1: 64
# Activation('relu')
# Dropout(0.5)
# Dense_2:  ? 
# Activation('sigmoid')

# print model summary

# # add your implementation

In [None]:
# compile model 
# use binary crossentropy loss 
# and rmsprop optimizer

# # add your implementation

In [None]:
train_data_dir = 'data/train'
validation_data_dir = 'data/validation'
nb_train_samples = 2000
nb_validation_samples = 800
epochs = 50
batch_size = 16

In [None]:
# define a keras ImageDataGenerator for training data with appropriate augmentation 
# use rescale=1. / 255 to normalise pixles values

# define a keras ImageDataGenerator for test data  (no augmentation only rescaling)

# # add your implementation

In [3]:
# define training and validation iterators 
# use ImageDataGenerator.flow_from_directory to training and validation dirs accordingly 

# # add your implementation

In [None]:
# train and validate the model using fit_generator

# # add your implementation

## Train a MLP using bottleneck features

In [None]:
# define VGG16 network using keras applications.VGG16  
# set weights = 'imagenet'
# set include_top=False : don't include the fully-connected layer at the top of the network 
# set input_shape: image shape

# # add your implementation

# define a keras ImageDataGenerator for data (no augmentation only rescaling)
# define training and validation iterators same as before (set shuffle=False)

# # add your implementation

# extract image features for training and validation separately using predict_generator

# # add your implementation

# save training and validation features

# # add your implementation

In [4]:
# create training and validation labels array (features are in order because we used shuffle=false) 

# # add your implementation

# define model of two dense layers 256 and ? 
# model should start with Flatten layer (to flatten extracted image features to a vector)
# don’t forget activations and dropout(0.5)
# compile with binary_crossentropy loss and rmsprop optimizer

# # add your implementation

# Train MLP using fit function

# # add your implementation

# save weights to use them in fine-tuning later on

# # add your implementation

## Finetuning top layers of pretrained model on our small data

In [None]:
# define new model:  VGG16 as base and MLP as top 
# input = base_model.input, output=top_model(base_model.output)

# # add your implementation

# freeze the first 15 layers (up to the last conv block)
# set trainable=false (weights will not be updated)

# # add your implementation

# compile with binary_crossentropy loss and SGD with low learing rate optimizer

# # add your implementation

In [None]:
# fine-tune the model using fit_generator and train and validation iterators