# Retrain a VGG16 Architecture
* https://keras.io/applications/#vgg16
* https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html
* https://github.com/fchollet/deep-learning-with-python-notebooks/blob/master/5.3-using-a-pretrained-convnet.ipynb

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
%matplotlib inline
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [3]:
import matplotlib.pylab as plt
import numpy as np

In [4]:
from distutils.version import StrictVersion

In [5]:
import sklearn
print(sklearn.__version__)

assert StrictVersion(sklearn.__version__ ) >= StrictVersion('0.18.1')

0.18.1


In [6]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
print(tf.__version__)

assert StrictVersion(tf.__version__) >= StrictVersion('1.1.0')

1.2.1


In [7]:
import keras
print(keras.__version__)

assert StrictVersion(keras.__version__) >= StrictVersion('2.0.0')

Using TensorFlow backend.


2.0.8


In [8]:
import pandas as pd
print(pd.__version__)

assert StrictVersion(pd.__version__) >= StrictVersion('0.20.0')

0.20.1


## Preparation

In [9]:
# the larger the longer it takes, be sure to also adapt input layer size auf vgg network to this value

INPUT_SHAPE = (64, 64)
# INPUT_SHAPE = (128, 128)
# INPUT_SHAPE = (256, 256)

In [10]:
EPOCHS = 50

In [11]:
# Depends on harware GPU architecture, set as high as possible (this works well on K80)
BATCH_SIZE = 100

In [12]:
!rm -rf ./tf_log
# https://keras.io/callbacks/#tensorboard
tb_callback = keras.callbacks.TensorBoard(log_dir='./tf_log')
# To start tensorboard
# tensorboard --logdir=./tf_log
# open http://localhost:6006

In [13]:
!ls -lh

total 317M
-rw-rw-r-- 1 ubuntu ubuntu  44K Oct  1 08:04 440px-Beagle_Upsy.jpg
drwxrwxr-x 8 ubuntu ubuntu 4.0K Oct  1 08:10 augmented-signs
-rw-rw-r-- 1 ubuntu ubuntu  17M Oct  1 08:10 augmented-signs.zip
-rw-rw-r-- 1 ubuntu ubuntu 303K Sep 27 15:22 Black_New_York_stuy_town_squirrel_amanda_ernlund.jpeg
-rw-rw-r-- 1 ubuntu ubuntu 844K Oct  1 08:04 cat-bonkers.png
-rw-rw-r-- 1 ubuntu ubuntu 140K Sep 27 15:22 cnn-augmentation.ipynb
-rw-rw-r-- 1 ubuntu ubuntu 1.6M Oct  1 08:04 cnn-comparing-all-models.ipynb
-rw-rw-r-- 1 ubuntu ubuntu 484K Oct  1 09:57 cnn-imagenet-retrain.ipynb
-rw-rw-r-- 1 ubuntu ubuntu 158K Oct  1 08:04 cnn-intro.ipynb
-rw-rw-r-- 1 ubuntu ubuntu 1.3M Oct  1 08:04 cnn-prediction.ipynb
-rw-rw-r-- 1 ubuntu ubuntu 158K Oct  1 08:04 cnn-standard-architectures.ipynb
-rw-rw-r-- 1 ubuntu ubuntu 198K Oct  1 08:04 cnn-train-augmented.ipynb
-rw-rw-r-- 1 ubuntu ubuntu 495K Sep 27 15:22 london.jpg
drwxrwxr-x 3 ubuntu ubuntu 4.0K Sep 27 15:25 __MACOSX
-rw-rw-r-- 1 ubuntu

In [14]:
import os
import skimage.data
import skimage.transform
from keras.utils.np_utils import to_categorical
import numpy as np

def load_data(data_dir, type=".ppm"):
    num_categories = 6

    # Get all subdirectories of data_dir. Each represents a label.
    directories = [d for d in os.listdir(data_dir) 
                   if os.path.isdir(os.path.join(data_dir, d))]
    # Loop through the label directories and collect the data in
    # two lists, labels and images.
    labels = []
    images = []
    for d in directories:
        label_dir = os.path.join(data_dir, d)
        file_names = [os.path.join(label_dir, f) for f in os.listdir(label_dir) if f.endswith(type)]
        # For each label, load it's images and add them to the images list.
        # And add the label number (i.e. directory name) to the labels list.
        for f in file_names:
            images.append(skimage.data.imread(f))
            labels.append(int(d))
    images64 = [skimage.transform.resize(image, INPUT_SHAPE) for image in images]
    y = np.array(labels)
    y = to_categorical(y, num_categories)
    X = np.array(images64)
    return X, y

In [15]:
# Load datasets.
ROOT_PATH = "./"
original_dir = os.path.join(ROOT_PATH, "speed-limit-signs")
original_images, original_labels = load_data(original_dir, type=".ppm")

In [16]:
X, y = original_images, original_labels

### Uncomment next three cells if you want to train on augmented image set
#### Otherwise Overfitting can not be avoided because image set is simply too small

In [17]:
# !curl -O https://raw.githubusercontent.com/DJCordhose/speed-limit-signs/master/data/augmented-signs.zip
# from zipfile import ZipFile
# zip = ZipFile('augmented-signs.zip')
# zip.extractall('.')

In [18]:
data_dir = os.path.join(ROOT_PATH, "augmented-signs")
augmented_images, augmented_labels = load_data(data_dir, type=".png")

In [19]:
# merge both data sets

all_images = np.vstack((X, augmented_images))
all_labels = np.vstack((y, augmented_labels))

# shuffle
# https://stackoverflow.com/a/4602224

p = numpy.random.permutation(len(all_labels))
shuffled_images = all_images[p]
shuffled_labels = all_labels[p]
X, y = shuffled_images, shuffled_labels

### Split test and train data 80% to 20%

In [20]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_train.shape, y_train.shape

((3335, 64, 64, 3), (3335, 6))

## First Step: Load VGG pretrained on imagenet and remove classifier
### Hope: Feature Extraction will also work well for Speed Limit Signs

![VGG architecture](https://djcordhose.github.io/ai/img/sketch/vgg-no-classifier.png)

### Imagenet
* Collection of labelled images from many categories
* http://image-net.org/

http://image-net.org/about-stats

<table class="table-stats" style="width: 500px">
<tbody><tr>
<td width="25%"><b>High level category</b></td>
<td width="20%"><b># synset (subcategories)</b></td>
<td width="30%"><b>Avg # images per synset</b></td>
<td width="25%"><b>Total # images</b></td>
</tr>

<tr><td>amphibian</td><td>94</td><td>591</td><td>56K</td></tr>

<tr><td>animal</td><td>3822</td><td>732</td><td>2799K</td></tr>

<tr><td>appliance</td><td>51</td><td>1164</td><td>59K</td></tr>

<tr><td>bird</td><td>856</td><td>949</td><td>812K</td></tr>

<tr><td>covering</td><td>946</td><td>819</td><td>774K</td></tr>

<tr><td>device</td><td>2385</td><td>675</td><td>1610K</td></tr>

<tr><td>fabric</td><td>262</td><td>690</td><td>181K</td></tr>

<tr><td>fish</td><td>566</td><td>494</td><td>280K</td></tr>

<tr><td>flower</td><td>462</td><td>735</td><td>339K</td></tr>

<tr><td>food</td><td>1495</td><td>670</td><td>1001K</td></tr>

<tr><td>fruit</td><td>309</td><td>607</td><td>188K</td></tr>

<tr><td>fungus</td><td>303</td><td>453</td><td>137K</td></tr>

<tr><td>furniture</td><td>187</td><td>1043</td><td>195K</td></tr>

<tr><td>geological formation</td><td>151</td><td>838</td><td>127K</td></tr>

<tr><td>invertebrate</td><td>728</td><td>573</td><td>417K</td></tr>

<tr><td>mammal</td><td>1138</td><td>821</td><td>934K</td></tr>

<tr><td>musical instrument</td><td>157</td><td>891</td><td>140K</td></tr>


<tr><td>plant</td><td>1666</td><td>600</td><td>999K</td></tr>

<tr><td>reptile</td><td>268</td><td>707</td><td>190K</td></tr>

<tr><td>sport</td><td>166</td><td>1207</td><td>200K</td></tr>

<tr><td>structure</td><td>1239</td><td>763</td><td>946K</td></tr>

<tr><td>tool</td><td>316</td><td>551</td><td>174K</td></tr>

<tr><td>tree</td><td>993</td><td>568</td><td>564K</td></tr>

<tr><td>utensil</td><td>86</td><td>912</td><td>78K</td></tr>

<tr><td>vegetable</td><td>176</td><td>764</td><td>135K</td></tr>

<tr><td>vehicle</td><td>481</td><td>778</td><td>374K</td></tr>

<tr><td>person</td><td>2035</td><td>468</td><td>952K</td></tr>

</tbody></table>

### Might be more suitable for cats and dogs, but is the best we have right now

In [21]:
from keras import applications
# applications.VGG16?
vgg_model = applications.VGG16(include_top=False, weights='imagenet', input_shape=(64, 64, 3))
# vgg_model = applications.VGG16(include_top=False, weights='imagenet', input_shape=(128, 128, 3))
# vgg_model = applications.VGG16(include_top=False, weights='imagenet', input_shape=(256, 256, 3))

### All Convolutional Blocks are kept fully trained, we just removed the classifier part

In [22]:
vgg_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 64, 64, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 64, 64, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 64, 64, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 32, 32, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 32, 32, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 32, 32, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 16, 16, 128)       0         
__________

### Next step is to push all our signs through the net just once and record the output of bottleneck features
#### Don't get confused: this is no training, yet, this just is recording the prediction in order not to repeat this expensive step over and over again when we train the classifier later

In [23]:
# will take a while, but not really long depending on size and number of input images

%time bottleneck_features_train = vgg_model.predict(X_train)

CPU times: user 7.03 s, sys: 1.34 s, total: 8.36 s
Wall time: 7.86 s


In [24]:
bottleneck_features_train.shape

(3335, 2, 2, 512)

## What does this mean?
* 303 predictions for 303 images or 3335 predictions for 3335 images when using augmented data set
* 512 bottleneck feature per prediction
* each bottleneck feature has a size of 2x2, just a blob more or less
* bottleneck feature has larger size when we increase size of input images (might be a good idea)
  * 4x4 when using 128x128 as input
  * 8x8 when using 256x256 as input

In [25]:
first_bottleneck_feature = bottleneck_features_train[0,:,:, 0]

In [26]:
first_bottleneck_feature

array([[ 0.        ,  0.        ],
       [ 1.00562787,  0.80484837]], dtype=float32)

## Now we create a new classifier and train it with this output and the labels from ground truth
### Classifier is copied from our first VGG style network

In [27]:
input_shape = bottleneck_features_train.shape[1:]

In [28]:
from keras.models import Model
from keras.layers import Dense, Dropout, Flatten, Input

# try and vary between .4 and .75
drop_out = 0.50

inputs = Input(shape=input_shape)

x = Flatten()(inputs)

# this is an additional dropout to compensate for the missing one after bottleneck features
x = Dropout(drop_out)(x)

x = Dense(256, activation='relu')(x)
x = Dropout(drop_out)(x)

# softmax activation, 6 categories
predictions = Dense(6, activation='softmax')(x)

In [29]:
classifier_model = Model(input=inputs, output=predictions)
classifier_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 2, 2, 512)         0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 2048)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 2048)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               524544    
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 6)                 1542      
Total params: 526,086
Trainable params: 526,086
Non-trainable params: 0
_________________________________________________________________


In [30]:
classifier_model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [31]:
!rm -rf tf_log
# https://keras.io/callbacks/#tensorboard
tb_callback = keras.callbacks.TensorBoard(log_dir='./tf_log')
# To start tensorboard
# tensorboard --logdir=/mnt/c/Users/olive/Development/ml/tf_log
# open http://localhost:6006

## This is a very simple architecture and should train pretty fast
* it overfits by quite a bit

In [32]:
%time history = classifier_model.fit(bottleneck_features_train, y_train, epochs=500, batch_size=BATCH_SIZE, validation_split=0.2, callbacks=[tb_callback])
# more epochs might be needed for original data
# %time history = classifier_model.fit(bottleneck_features_train, y_train, epochs=2000, batch_size=BATCH_SIZE, validation_split=0.2, callbacks=[tb_callback])

Train on 2668 samples, validate on 667 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500


Epoch 130/500
Epoch 131/500
Epoch 132/500
Epoch 133/500
Epoch 134/500
Epoch 135/500
Epoch 136/500
Epoch 137/500
Epoch 138/500
Epoch 139/500
Epoch 140/500
Epoch 141/500
Epoch 142/500
Epoch 143/500
Epoch 144/500
Epoch 145/500
Epoch 146/500
Epoch 147/500
Epoch 148/500
Epoch 149/500
Epoch 150/500
Epoch 151/500
Epoch 152/500
Epoch 153/500
Epoch 154/500
Epoch 155/500
Epoch 156/500
Epoch 157/500
Epoch 158/500
Epoch 159/500
Epoch 160/500
Epoch 161/500
Epoch 162/500
Epoch 163/500
Epoch 164/500
Epoch 165/500
Epoch 166/500
Epoch 167/500
Epoch 168/500
Epoch 169/500
Epoch 170/500
Epoch 171/500
Epoch 172/500
Epoch 173/500
Epoch 174/500
Epoch 175/500
Epoch 176/500
Epoch 177/500
Epoch 178/500
Epoch 179/500
Epoch 180/500
Epoch 181/500
Epoch 182/500
Epoch 183/500
Epoch 184/500
Epoch 185/500
Epoch 186/500
Epoch 187/500
Epoch 188/500
Epoch 189/500
Epoch 190/500
Epoch 191/500
Epoch 192/500
Epoch 193/500
Epoch 194/500
Epoch 195/500
Epoch 196/500
Epoch 197/500
Epoch 198/500
Epoch 199/500
Epoch 200/500
Epoch 

Epoch 258/500
Epoch 259/500
Epoch 260/500
Epoch 261/500
Epoch 262/500
Epoch 263/500
Epoch 264/500
Epoch 265/500
Epoch 266/500
Epoch 267/500
Epoch 268/500
Epoch 269/500
Epoch 270/500
Epoch 271/500
Epoch 272/500
Epoch 273/500
Epoch 274/500
Epoch 275/500
Epoch 276/500
Epoch 277/500
Epoch 278/500
Epoch 279/500
Epoch 280/500
Epoch 281/500
Epoch 282/500
Epoch 283/500
Epoch 284/500
Epoch 285/500
Epoch 286/500
Epoch 287/500
Epoch 288/500
Epoch 289/500
Epoch 290/500
Epoch 291/500
Epoch 292/500
Epoch 293/500
Epoch 294/500
Epoch 295/500
Epoch 296/500
Epoch 297/500
Epoch 298/500
Epoch 299/500
Epoch 300/500
Epoch 301/500
Epoch 302/500
Epoch 303/500
Epoch 304/500
Epoch 305/500
Epoch 306/500
Epoch 307/500
Epoch 308/500
Epoch 309/500
Epoch 310/500
Epoch 311/500
Epoch 312/500
Epoch 313/500
Epoch 314/500
Epoch 315/500
Epoch 316/500
Epoch 317/500
Epoch 318/500
Epoch 319/500
Epoch 320/500
Epoch 321/500
Epoch 322/500
Epoch 323/500
Epoch 324/500
Epoch 325/500
Epoch 326/500
Epoch 327/500
Epoch 328/500
Epoch 

Epoch 386/500
Epoch 387/500
Epoch 388/500
Epoch 389/500
Epoch 390/500
Epoch 391/500
Epoch 392/500
Epoch 393/500
Epoch 394/500
Epoch 395/500
Epoch 396/500
Epoch 397/500
Epoch 398/500
Epoch 399/500
Epoch 400/500
Epoch 401/500
Epoch 402/500
Epoch 403/500
Epoch 404/500
Epoch 405/500
Epoch 406/500
Epoch 407/500
Epoch 408/500
Epoch 409/500
Epoch 410/500
Epoch 411/500
Epoch 412/500
Epoch 413/500
Epoch 414/500
Epoch 415/500
Epoch 416/500
Epoch 417/500
Epoch 418/500
Epoch 419/500
Epoch 420/500
Epoch 421/500
Epoch 422/500
Epoch 423/500
Epoch 424/500
Epoch 425/500
Epoch 426/500
Epoch 427/500
Epoch 428/500
Epoch 429/500
Epoch 430/500
Epoch 431/500
Epoch 432/500
Epoch 433/500
Epoch 434/500
Epoch 435/500
Epoch 436/500
Epoch 437/500
Epoch 438/500
Epoch 439/500
Epoch 440/500
Epoch 441/500
Epoch 442/500
Epoch 443/500
Epoch 444/500
Epoch 445/500
Epoch 446/500
Epoch 447/500
Epoch 448/500
Epoch 449/500
Epoch 450/500
Epoch 451/500
Epoch 452/500
Epoch 453/500
Epoch 454/500
Epoch 455/500
Epoch 456/500
Epoch 

## Issue 1: We have two separate models now
* How do we evaluate?
* How to save model for later prediction use / deployment?

In [33]:
from keras import models

combined_model = models.Sequential()
combined_model.add(vgg_model)
combined_model.add(classifier_model)

In [34]:
combined_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 2, 2, 512)         14714688  
_________________________________________________________________
model_1 (Model)              (None, 6)                 526086    
Total params: 15,240,774
Trainable params: 15,240,774
Non-trainable params: 0
_________________________________________________________________


In [35]:
combined_model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [36]:
train_loss, train_accuracy = combined_model.evaluate(X_train, y_train, batch_size=BATCH_SIZE)
train_loss, train_accuracy



(0.11308885638558792, 0.96731634484953077)

In [37]:
test_loss, test_accuracy = combined_model.evaluate(X_test, y_test, batch_size=BATCH_SIZE)
test_loss, test_accuracy



(0.49626786088486086, 0.85731415182566473)

In [38]:
# complete original non augmented speed limit signs
original_loss, original_accuracy = combined_model.evaluate(original_images, original_labels, batch_size=BATCH_SIZE)
original_loss, original_accuracy



(0.50608633509726508, 0.85488126774891071)

In [39]:
# combined_model.save('vgg16-retrained.hdf5')
combined_model.save('vgg16-augmented-retrained.hdf5')

In [40]:
# !ls -lh vgg16-retrained.hdf5
!ls -lh vgg16-augmented-retrained.hdf5

-rw-rw-r-- 1 ubuntu ubuntu 59M Oct  1 10:00 vgg16-augmented-retrained.hdf5


## Issue 2: Whatever we do, we overfit, much more than 85% on test not possible
* for non augmented data it might even be as low as 70%
* first thing we could try: maybe bottlebeck feature being 2x2 is too small, we could compensate by scaling images up to 128x128 or even 256x256
  * this can indeed bring up test score to 90%
  * however, this will make the model incompatible with the 64x64 input of the other models and make deployment harder, so we keep 64x64
* maybe feature extracting from Imagenet is too different from what we have with speed limit signs? 
* or is the classifier too simply for the complex features?

## Let us try some fine tuning

### First we freeze all but the last convolutional block

In [41]:
len(vgg_model.layers)

19

In [42]:
vgg_model.layers

[<keras.engine.topology.InputLayer at 0x7f85b65566d8>,
 <keras.layers.convolutional.Conv2D at 0x7f85b6556a58>,
 <keras.layers.convolutional.Conv2D at 0x7f85b6556c18>,
 <keras.layers.pooling.MaxPooling2D at 0x7f8568aaef98>,
 <keras.layers.convolutional.Conv2D at 0x7f85551a2be0>,
 <keras.layers.convolutional.Conv2D at 0x7f85551b6e80>,
 <keras.layers.pooling.MaxPooling2D at 0x7f85551c8518>,
 <keras.layers.convolutional.Conv2D at 0x7f85551717f0>,
 <keras.layers.convolutional.Conv2D at 0x7f8555171048>,
 <keras.layers.convolutional.Conv2D at 0x7f8555196668>,
 <keras.layers.pooling.MaxPooling2D at 0x7f8555141cf8>,
 <keras.layers.convolutional.Conv2D at 0x7f85550ea240>,
 <keras.layers.convolutional.Conv2D at 0x7f85550ead68>,
 <keras.layers.convolutional.Conv2D at 0x7f8555111630>,
 <keras.layers.pooling.MaxPooling2D at 0x7f85550a6320>,
 <keras.layers.convolutional.Conv2D at 0x7f85550cf940>,
 <keras.layers.convolutional.Conv2D at 0x7f85550cf240>,
 <keras.layers.convolutional.Conv2D at 0x7f855507

In [43]:
first_conv_layer = vgg_model.layers[1]

In [44]:
first_conv_layer.trainable

True

In [45]:
# set the first 15 layers (up to the last conv block)
# to non-trainable (weights will not be updated)
# so, the general features are kept and we (hopefully) do not have overfitting
non_trainable_layers = vgg_model.layers[:15]

In [46]:
non_trainable_layers

[<keras.engine.topology.InputLayer at 0x7f85b65566d8>,
 <keras.layers.convolutional.Conv2D at 0x7f85b6556a58>,
 <keras.layers.convolutional.Conv2D at 0x7f85b6556c18>,
 <keras.layers.pooling.MaxPooling2D at 0x7f8568aaef98>,
 <keras.layers.convolutional.Conv2D at 0x7f85551a2be0>,
 <keras.layers.convolutional.Conv2D at 0x7f85551b6e80>,
 <keras.layers.pooling.MaxPooling2D at 0x7f85551c8518>,
 <keras.layers.convolutional.Conv2D at 0x7f85551717f0>,
 <keras.layers.convolutional.Conv2D at 0x7f8555171048>,
 <keras.layers.convolutional.Conv2D at 0x7f8555196668>,
 <keras.layers.pooling.MaxPooling2D at 0x7f8555141cf8>,
 <keras.layers.convolutional.Conv2D at 0x7f85550ea240>,
 <keras.layers.convolutional.Conv2D at 0x7f85550ead68>,
 <keras.layers.convolutional.Conv2D at 0x7f8555111630>,
 <keras.layers.pooling.MaxPooling2D at 0x7f85550a6320>]

In [47]:
for layer in non_trainable_layers:
    layer.trainable = False

In [48]:
first_conv_layer.trainable

False

### We then tweak the complete model by very slowly re-retraining classifier and final convolutional block
* slow learning prevents us from ruining previous good results
* leave everthing else in place
    * earlier layers hopefully already encode common feaure channels
    * less risk of overfitting
      * earlier layers are more general
      * model has too much capacity for training and is likley to learn each and every detail
    * a little bit faster

#### This may still take quite a while

In [49]:
from keras import optimizers

# compile the model with a SGD/momentum optimizer
# and a very slow learning rate
# make updates very small and non adaptive so we do not ruin previous learnings 
combined_model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])

In [50]:
!rm -r tf_log

In [51]:
%time combined_model.fit(X_train, y_train, epochs=150, batch_size=BATCH_SIZE, validation_split=0.2, callbacks=[tb_callback])
# non augmented data is cheap to retrain, so we can try a few more epochs
# %time combined_model.fit(X_train, y_train, epochs=1000, batch_size=BATCH_SIZE, validation_split=0.2, callbacks=[tb_callback])

Train on 2668 samples, validate on 667 samples
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150


Epoch 130/150
Epoch 131/150
Epoch 132/150
Epoch 133/150
Epoch 134/150
Epoch 135/150
Epoch 136/150
Epoch 137/150
Epoch 138/150
Epoch 139/150
Epoch 140/150
Epoch 141/150
Epoch 142/150
Epoch 143/150
Epoch 144/150
Epoch 145/150
Epoch 146/150
Epoch 147/150
Epoch 148/150
Epoch 149/150
Epoch 150/150
CPU times: user 3min 44s, sys: 34.4 s, total: 4min 18s
Wall time: 17min 25s


<keras.callbacks.History at 0x7f8518cb8da0>

## 90% for validation is quite a bit of improvement, might even increase when we train for a bit longer

## Metrics for Augmented Data

### Accuracy
![Accuracy Fine Tuning](https://djcordhose.github.io/ai/img/tensorboard/cnn-acc-fine-tuning.png)
### Validation Accuracy
![Validation Accuracy Fine Tuning](https://djcordhose.github.io/ai/img/tensorboard/cnn-val-acc-fine-tuning.png)

In [52]:
train_loss, train_accuracy = combined_model.evaluate(X_train, y_train, batch_size=BATCH_SIZE)
train_loss, train_accuracy



(0.10222922369810358, 0.98020990570445821)

In [53]:
test_loss, test_accuracy = combined_model.evaluate(X_test, y_test, batch_size=BATCH_SIZE)
test_loss, test_accuracy



(0.42116924984563742, 0.89928059569365681)

In [54]:
# complete original non augmented speed limit signs
original_loss, original_accuracy = combined_model.evaluate(original_images, original_labels, batch_size=BATCH_SIZE)
original_loss, original_accuracy



(0.45832276033694636, 0.90501321683143876)

In [55]:
combined_model.save('vgg16-augmented-retrained-fine-tuned.hdf5')
# combined_model.save('vgg16-retrained-fine-tuned.hdf5')

In [56]:
# !ls -lh vgg16-retrained-fine-tuned.hdf5
!ls -lh vgg16-augmented-retrained-fine-tuned.hdf5

-rw-rw-r-- 1 ubuntu ubuntu 88M Oct  1 10:22 vgg16-augmented-retrained-fine-tuned.hdf5


---

# Hands-On: Experiment with all parameters
  
---