In [1]:
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing.image import ImageDataGenerator
import numpy as np

Using TensorFlow backend.


In [2]:
base_vgg16 = VGG16(weights='imagenet')

# Benchmark 1: VGG16 with replaced last layer

In [3]:
from keras.models import Model
from keras.layers import Dense

Replace the layer

In [4]:
final_layer = Dense(22, activation='softmax')(base_vgg16.get_layer('fc2').output)
benchmark_last_layer = Model(base_vgg16.input, final_layer)

In [5]:
benchmark_last_layer.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

Make all other layers not trainable

In [6]:
for layer in benchmark_last_layer.layers[:-1]:
    layer.trainable = False

In [7]:
from keras.metrics import top_k_categorical_accuracy
def top_1_error(y_true, y_pred):
    return 1-top_k_categorical_accuracy(y_true, y_pred, k=1)

def top_3_error(y_true, y_pred):
    return 1-top_k_categorical_accuracy(y_true, y_pred, k=3)

In [8]:
benchmark_last_layer.compile(loss='categorical_crossentropy', optimizer='adam',
                            metrics=[top_1_error, top_3_error]) 

Data generators for preprocessed data

In [9]:
def preprocess_img(img):
    img = np.expand_dims(img, axis=0)
    return preprocess_input(img)

datagen_train = ImageDataGenerator(preprocessing_function=preprocess_img,
                                   horizontal_flip=True)

def generator_train(batch_size):
    return datagen_train.flow_from_directory(
        './data/images/train', target_size=(224, 224),
        batch_size=batch_size, class_mode='categorical')

datagen_val = ImageDataGenerator(preprocessing_function=preprocess_img)

def generator_val(batch_size, shuffle=True):
    return datagen_val.flow_from_directory(
        './data/images/val', target_size=(224, 224),
        batch_size=batch_size, class_mode='categorical', 
        shuffle=shuffle)

In [10]:
import pandas as pd
image_level_statistics = pd.read_csv('./results/data_analysis/image_level_statistics.csv')
n_train_images = (image_level_statistics['sample'] == 'train').sum()
n_val_images = (image_level_statistics['sample'] == 'val').sum()

Some callbacks. We save the model with the lowest validation loss, stop if there was no decrease in 20 epochs, and track the history of losses and errors.

In [11]:
from keras.callbacks import ModelCheckpoint, EarlyStopping, LambdaCallback
checkpointer = ModelCheckpoint(filepath='./saved_models/benchmark/weights_vgg16_top_only.hdf5', 
                               verbose=1, save_best_only=True)
stopper = EarlyStopping(monitor='val_loss', min_delta=0, patience=20, verbose=1, mode='auto')

In [12]:
history_last_layer = list()
def get_history(history_dict):
    historian = LambdaCallback(
        on_epoch_end=lambda epoch, logs:  history_dict.append(
            {'loss': logs['loss'], 
             'top_1_error': logs['top_1_error'],
             'top_3_error': logs['top_3_error'], 
             'val_loss': logs['val_loss'],  
             'val_top_1_error': logs['val_top_1_error'], 
             'val_top_3_error': logs['val_top_3_error']}
        )
    )
    return historian

In [13]:
epochs = 100

In [14]:
batch_size = 32

In [15]:
benchmark_last_layer.fit_generator(generator_train(batch_size), 
                                   steps_per_epoch=n_train_images/(5*batch_size),
                                   epochs=epochs, 
                                   callbacks = [checkpointer, stopper, 
                                                get_history(history_last_layer)], 
                                   validation_data=generator_val(batch_size), 
                                   validation_steps=n_val_images/(5*batch_size), 
                                   verbose=2)

Found 12990 images belonging to 22 classes.
Found 4319 images belonging to 22 classes.
Epoch 1/100
Epoch 00000: val_loss improved from inf to 2.62567, saving model to ./saved_models/benchmark/weights_vgg16_top_only.hdf5
34s - loss: 2.6499 - top_1_error: 0.6589 - top_3_error: 0.3960 - val_loss: 2.6257 - val_top_1_error: 0.6285 - val_top_3_error: 0.3391
Epoch 2/100
Epoch 00001: val_loss improved from 2.62567 to 2.13961, saving model to ./saved_models/benchmark/weights_vgg16_top_only.hdf5
32s - loss: 2.2188 - top_1_error: 0.5667 - top_3_error: 0.3034 - val_loss: 2.1396 - val_top_1_error: 0.5382 - val_top_3_error: 0.2755
Epoch 3/100
Epoch 00002: val_loss improved from 2.13961 to 2.07717, saving model to ./saved_models/benchmark/weights_vgg16_top_only.hdf5
32s - loss: 2.1111 - top_1_error: 0.5541 - top_3_error: 0.2843 - val_loss: 2.0772 - val_top_1_error: 0.5625 - val_top_3_error: 0.2419
Epoch 4/100
Epoch 00003: val_loss improved from 2.07717 to 1.95054, saving model to ./saved_models/bench

<keras.callbacks.History at 0x7fa613536470>

In [16]:
benchmark_last_layer.load_weights('./saved_models/benchmark/weights_vgg16_top_only.hdf5')
performance_benchmark_last_layer = benchmark_last_layer.evaluate_generator(
    generator_val(batch_size, shuffle=False), steps = n_val_images/batch_size)
print('Validation loss: {:2f}'.format(performance_benchmark_last_layer[0]))
print('Validation top-1-error rate: {:.2f}%'.format(100*performance_benchmark_last_layer[1]))
print('Validation top-3-error rate: {:.2f}%'.format(100*performance_benchmark_last_layer[2]))

Found 4319 images belonging to 22 classes.
Validation loss: 2.020255
Validation top-1-error rate: 51.45%
Validation top-3-error rate: 23.76%


Save the results

In [17]:
pd.DataFrame(history_last_layer).to_csv('./results/benchmark/history_last_layer.csv')

# Benchmark 2: VGG16 with replaced dense layers
Looking at the losses from Benchmark 1, the model is probably not flexible enough but still quickly over-fitting. Lets try to change things up. We exchange the flatten layer for a global average pooling layer and add our own dense layers after that. (We cannot directly re-use the initial layers since the first Dense layer is just too big for our compouter.) We add dropout. The new dense layers are initiated with random weights. 

In [18]:
from keras.layers import Dropout, GlobalAveragePooling2D

In [19]:
dense = GlobalAveragePooling2D()(base_vgg16.get_layer('block5_pool').output)
dense = Dense(2048, activation='relu', name='fc1')(dense)
dense = Dropout(0.3)(dense)
dense = Dense(2048, activation='relu', name='fc2')(dense)
dense = Dropout(0.3)(dense)
dense = Dense(22, activation='softmax', name='prediction')(dense)

In [20]:
benchmark_all_dense_layers = Model(base_vgg16.input, dense)

In [21]:
benchmark_all_dense_layers.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [22]:
print('Which layers are trainable?')
for layer in benchmark_all_dense_layers.layers:
    print('{}: {}'.format(layer.name, layer.trainable))

Which layers are trainable?
input_1: False
block1_conv1: False
block1_conv2: False
block1_pool: False
block2_conv1: False
block2_conv2: False
block2_pool: False
block3_conv1: False
block3_conv2: False
block3_conv3: False
block3_pool: False
block4_conv1: False
block4_conv2: False
block4_conv3: False
block4_pool: False
block5_conv1: False
block5_conv2: False
block5_conv3: False
block5_pool: False
global_average_pooling2d_1: True
fc1: True
dropout_1: True
fc2: True
dropout_2: True
prediction: True


In [23]:
benchmark_all_dense_layers.compile(loss='categorical_crossentropy', optimizer='adam',
                                   metrics=[top_1_error, top_3_error])

In [24]:
checkpointer = ModelCheckpoint(filepath='./saved_models/benchmark/weights_vgg16_dense.hdf5', 
                               verbose=1, save_best_only=True)

In [25]:
history_all_dense_layers = list()

In [26]:
batch_size=25

In [27]:
benchmark_all_dense_layers.fit_generator(generator_train(batch_size), 
                                         steps_per_epoch=n_train_images/(5*batch_size),
                                         epochs=epochs, 
                                         callbacks = [checkpointer, stopper, 
                                                      get_history(history_all_dense_layers)], 
                                         validation_data=generator_val(batch_size), 
                                         validation_steps=n_val_images/(5*batch_size), 
                                         verbose=2)

Found 12990 images belonging to 22 classes.
Found 4319 images belonging to 22 classes.
Epoch 1/100
Epoch 00000: val_loss improved from inf to 2.09595, saving model to ./saved_models/benchmark/weights_vgg16_dense.hdf5
36s - loss: 5.3808 - top_1_error: 0.7688 - top_3_error: 0.5538 - val_loss: 2.0960 - val_top_1_error: 0.6366 - val_top_3_error: 0.3783
Epoch 2/100
Epoch 00001: val_loss improved from 2.09595 to 1.87746, saving model to ./saved_models/benchmark/weights_vgg16_dense.hdf5
35s - loss: 2.1603 - top_1_error: 0.6515 - top_3_error: 0.3731 - val_loss: 1.8775 - val_top_1_error: 0.6069 - val_top_3_error: 0.3097
Epoch 3/100
Epoch 00002: val_loss improved from 1.87746 to 1.77648, saving model to ./saved_models/benchmark/weights_vgg16_dense.hdf5
35s - loss: 1.9803 - top_1_error: 0.5858 - top_3_error: 0.3285 - val_loss: 1.7765 - val_top_1_error: 0.5463 - val_top_3_error: 0.2731
Epoch 4/100
Epoch 00003: val_loss improved from 1.77648 to 1.67309, saving model to ./saved_models/benchmark/weig

<keras.callbacks.History at 0x7fa611ec6dd8>

In [28]:
benchmark_all_dense_layers.load_weights('./saved_models/benchmark/weights_vgg16_dense.hdf5')
performance_benchmark_all_dense_layers = benchmark_all_dense_layers.evaluate_generator(
    generator_val(batch_size, shuffle=False), steps = n_val_images/batch_size)
print('Validation loss: {:2f}'.format(performance_benchmark_all_dense_layers[0]))
print('Validation top-1-error rate: {:.2f}%'.format(100*performance_benchmark_all_dense_layers[1]))
print('Validation top-3-error rate: {:.2f}%'.format(100*performance_benchmark_all_dense_layers[2]))

Found 4319 images belonging to 22 classes.
Validation loss: 1.564269
Validation top-1-error rate: 46.45%
Validation top-3-error rate: 20.91%


Save results

In [29]:
pd.DataFrame(history_all_dense_layers).to_csv('./results/benchmark/history_all_dense_layers.csv')

# Benchmark 3: VGG16 with replaced dense layers and trained convolutional block
Benchmark 2 is doing better on the validation data. Let's try to be even more flexible by allowing the last convolutional block to train as well. Here we are going to use the weights from Benchmark 2 to initialize. We just use the same model and make some more layers trainable. 

In [30]:
benchmark_dense_and_conv = benchmark_all_dense_layers

In [31]:
for layer in benchmark_dense_and_conv.layers[15:]:
    layer.trainable = True

In [32]:
print('Which layers are trainable?')
for layer in benchmark_dense_and_conv.layers:
    print('{}: {}'.format(layer.name, layer.trainable))

Which layers are trainable?
input_1: False
block1_conv1: False
block1_conv2: False
block1_pool: False
block2_conv1: False
block2_conv2: False
block2_pool: False
block3_conv1: False
block3_conv2: False
block3_conv3: False
block3_pool: False
block4_conv1: False
block4_conv2: False
block4_conv3: False
block4_pool: False
block5_conv1: True
block5_conv2: True
block5_conv3: True
block5_pool: True
global_average_pooling2d_1: True
fc1: True
dropout_1: True
fc2: True
dropout_2: True
prediction: True


We use a slower learning rate here for fine-tuning

In [33]:
from keras.optimizers import SGD
sgd = SGD(lr=10e-4, momentum=0.9, nesterov=True)

In [34]:
benchmark_dense_and_conv.compile(loss='categorical_crossentropy', optimizer=sgd,
                                 metrics=[top_1_error, top_3_error])

In [35]:
checkpointer = ModelCheckpoint(filepath='./saved_models/benchmark/weights_vgg16_conv_and_dense.hdf5', 
                               verbose=1, save_best_only=True)

In [36]:
history_dense_and_conv = list()

In [37]:
batch_size=20

In [38]:
benchmark_dense_and_conv.fit_generator(generator_train(batch_size), 
                                       steps_per_epoch=n_train_images/(5*batch_size),
                                       epochs=epochs, 
                                       callbacks = [checkpointer, stopper, 
                                                    get_history(history_dense_and_conv)], 
                                       validation_data=generator_val(batch_size), 
                                       validation_steps=n_val_images/(5*batch_size), 
                                       verbose=2)

Found 12990 images belonging to 22 classes.
Found 4319 images belonging to 22 classes.
Epoch 1/100
43s - loss: 1.8375 - top_1_error: 0.5635 - top_3_error: 0.2719 - val_loss: 1.8243 - val_top_1_error: 0.5580 - val_top_3_error: 0.2920
Epoch 2/100
41s - loss: 1.7299 - top_1_error: 0.5250 - top_3_error: 0.2612 - val_loss: 1.8512 - val_top_1_error: 0.5443 - val_top_3_error: 0.2625
Epoch 3/100
42s - loss: 1.6476 - top_1_error: 0.5158 - top_3_error: 0.2392 - val_loss: 1.6658 - val_top_1_error: 0.5011 - val_top_3_error: 0.2284
Epoch 4/100
42s - loss: 1.5973 - top_1_error: 0.5031 - top_3_error: 0.2250 - val_loss: 1.6103 - val_top_1_error: 0.4909 - val_top_3_error: 0.1977
Epoch 5/100
42s - loss: 1.5871 - top_1_error: 0.4931 - top_3_error: 0.2212 - val_loss: 1.6102 - val_top_1_error: 0.4773 - val_top_3_error: 0.2193
Epoch 6/100
41s - loss: 1.3977 - top_1_error: 0.4365 - top_3_error: 0.1762 - val_loss: 1.6369 - val_top_1_error: 0.4943 - val_top_3_error: 0.2273
Epoch 7/100
42s - loss: 1.3474 - top_

<keras.callbacks.History at 0x7fa613adedd8>

In [39]:
benchmark_dense_and_conv.load_weights('./saved_models/benchmark/weights_vgg16_conv_and_dense.hdf5')
performance_benchmark_conv_and_dense_layers = benchmark_dense_and_conv.evaluate_generator(
    generator_val(batch_size, shuffle=False), steps = n_val_images/batch_size)
print('Validation loss: {:2f}'.format(performance_benchmark_conv_and_dense_layers[0]))
print('Validation top-1-error rate: {:.2f}%'.format(100*performance_benchmark_conv_and_dense_layers[1]))
print('Validation top-3-error rate: {:.2f}%'.format(100*performance_benchmark_conv_and_dense_layers[2]))

Found 4319 images belonging to 22 classes.
Validation loss: 1.483656
Validation top-1-error rate: 42.12%
Validation top-3-error rate: 18.41%


Doing better than the previous models :).

Save results

In [40]:
pd.DataFrame(history_dense_and_conv).to_csv('./results/benchmark/history_dense_and_conv.csv')

In [1]:
from keras.models import load_model
load_model('./saved_models/benchmark/weights_vgg16_conv_and_dense.hdf5', compile=False).summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________