## Training the model

In [None]:
# import system libraries
import os, sys
path = os.getcwd()

In [None]:
# import utility libraries
from utils import *
from vgg16 import Vgg16

%matplotlib inline

In [None]:
# set path
train_path = path + '/train/'
valid_path = path + '/validation/'
test_path = path + '/test/'
results_path = path + '/results/'
real_time_path = path + '/realtime'

In [None]:
# instantiate VGG16, weights, models etc
vgg = Vgg16()

In [None]:
# set hyperparameters
batch_size=32
no_of_epochs=15

In [None]:
# finetune the model
batches = vgg.get_batches(train_path, batch_size=batch_size)
val_batches = vgg.get_batches(valid_path, batch_size=batch_size)
vgg.finetune(batches)

vgg.model.optimizer.lr = 0.01

In [None]:
# run the loop
latest_weights_filename = None

for epoch in range(no_of_epochs):
    print ("Running epoch: %d" % epoch)
    vgg.fit(batches, val_batches, nb_epoch=1)
    latest_weights_filename = 'ft%d.h5' % epoch
    vgg.model.save_weights(results_path+latest_weights_filename)

print ("Completed %s fit operations" % no_of_epochs)

In [None]:
# test the model 
batches, preds = vgg.test(test_path, batch_size = batch_size)

In [None]:
print (preds[:5])

filenames = batches.filenames
print (filenames[:5])

In [None]:
# verify some images
from PIL import Image
Image.open(test_path + filenames[2])

In [None]:
# save test results
save_array(results_path + 'test_preds.dat', preds)
save_array(results_path + 'filenames.dat', filenames)

## Validate results

In [None]:
vgg.model.load_weights(results_path + latest_weights_filename)

In [None]:
val_batches, probs = vgg.test(valid_path, batch_size = batch_size)

In [None]:
filenames = val_batches.filenames
expected_labels = val_batches.classes #0 or 1

# round the predictions to 0/1 to generate labels
our_predictions = probs[:,0]
our_labels = np.round(1-our_predictions)

In [None]:
from keras.preprocessing import image

# helper function to plot images by index in the validation set 
# plots is a helper function in utils.py
def plots_idx(idx, titles=None):
    plots([image.load_img(valid_path + filenames[i]) for i in idx], titles=titles)
    
# number of images to view for each visualization task
n_view = 4

In [None]:
# 1. few correct labels at random
correct = np.where(our_labels==expected_labels)[0]
print ("Found %d correct labels" % len(correct))
idx = permutation(correct)[:n_view]
plots_idx(idx, our_predictions[idx])

In [None]:
# 2. few incorrect labels at random
incorrect = np.where(our_labels!=expected_labels)[0]
print ("Found %d incorrect labels" % len(incorrect))
idx = permutation(incorrect)[:n_view]
plots_idx(idx, our_predictions[idx])

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(expected_labels, our_labels)

In [None]:
plot_confusion_matrix(cm, val_batches.class_indices)

## Realtime test

In [None]:
# test the model 
batches, preds = vgg.test(real_time_path, batch_size = batch_size)