In [1]:
%matplotlib inline
import utils; reload(utils)
from utils import *
from __future__ import division, print_function

 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using Theano backend.


In [2]:
import os, sys
current_dir = os.getcwd()
LESSON_HOME_DIR = current_dir
DATA_HOME_DIR = current_dir+'/data/dogscats'

%cd $DATA_HOME_DIR

/home/ubuntu/fast-ai/data/dogscats


## Setup

In [3]:
%pwd

u'/home/ubuntu/fast-ai/data/dogscats'

In [4]:
path = "./"
model_path = path + 'models/'

batch_size=64

In [25]:
batches = get_batches(path+'train', shuffle=False, batch_size=batch_size)
val_batches = get_batches(path+'valid', shuffle=False, batch_size=batch_size)

Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.
Found 0 images belonging to 0 classes.


In [6]:
(val_classes, trn_classes, val_labels, trn_labels, 
    val_filenames, filenames, test_filenames) = get_classes(path)

Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.
Found 0 images belonging to 1 classes.


In this notebook we're going to create an ensemble of models and use their average as our predictions. For each ensemble, we're going to follow our usual fine-tuning steps:

1) Create a model that retrains just the last layer
2) Add this to a model containing all VGG layers except the last layer
3) Fine-tune just the dense layers of this model (pre-computing the convolutional layers)
4) Add data augmentation, fine-tuning the dense layers without pre-computation.

So first, we need to create our VGG model and pre-compute the output of the conv layers:

In [7]:
from keras.applications import vgg16

model = vgg16.VGG16(weights = 'imagenet', include_top=True)
conv_layers,fc_layers = split_at(model, Convolution2D)

In [8]:
conv_model = Sequential(conv_layers)

In [14]:
val_features = conv_model.predict_generator(val_batches, val_batches.nb_sample)
trn_features = conv_model.predict_generator(batches, batches.nb_sample)

In [15]:
save_array(model_path + 'train_convlayer_features.bc', trn_features)
save_array(model_path + 'valid_convlayer_features.bc', val_features)

In the future we can just load these precomputed features:

In [9]:
trn_features = load_array(model_path+'train_convlayer_features.bc')
val_features = load_array(model_path+'valid_convlayer_features.bc')

Finally, we can precompute the output of all but the last dropout and dense layers, for creating the first stage of the model:

In [10]:
model.layers.pop()
model.layers.pop()

<keras.layers.core.Dense at 0x7f802e0b0950>

In [13]:
fc_model = Sequential()
fc_model.add(MaxPooling2D(input_shape=(512,14,14)))
fc_model.add(Flatten())
fc_model.add(Dense(4096, activation='relu'))

In [14]:
for l1,l2 in zip(fc_model.layers, model.layers[-3:]): 
    weights = l2.get_weights()
    l1.set_weights(weights)
fc_model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
maxpooling2d_1 (MaxPooling2D)    (None, 512, 7, 7)     0           maxpooling2d_input_1[0][0]       
____________________________________________________________________________________________________
flatten_1 (Flatten)              (None, 25088)         0           maxpooling2d_1[0][0]             
____________________________________________________________________________________________________
dense_1 (Dense)                  (None, 4096)          102764544   flatten_1[0][0]                  
Total params: 102,764,544
Trainable params: 102,764,544
Non-trainable params: 0
____________________________________________________________________________________________________


In [15]:
fc_model.compile(optimizer=Adam(1e-5), loss='categorical_crossentropy', 
                     metrics=['accuracy'])

In [40]:
ll_val_feat = fc_model.predict(val_features)
ll_feat = fc_model.predict(trn_features)

In [41]:
save_array(model_path + 'train_ll_feat.bc', ll_feat)
save_array(model_path + 'valid_ll_feat.bc', ll_val_feat)

In [16]:
ll_feat = load_array(model_path+ 'train_ll_feat.bc')
ll_val_feat = load_array(model_path + 'valid_ll_feat.bc')

In [None]:
test_batches = get_batches(path+'test',shuffle=False, batch_size=batch_size)
test_features = conv_model.predict_generator(test_batches, test_batches.n)
save_array(model_path + 'test_convlayer_features.bc', val_features)

In [None]:
test_feature = load_array(model_path+'train_convlayer_features.bc')

## Last layer

The functions automate creating a model that trains the last layer from scratch, and then adds those new layers on to the main model.

In [17]:
def get_ll_layers():
    return [ 
        BatchNormalization(input_shape=(4096,)),
        Dropout(0.5),
        Dense(2, activation='softmax') 
        ]

In [18]:
def train_last_layer(i):
    ll_layers = get_ll_layers()
    ll_model = Sequential(ll_layers)
    ll_model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    ll_model.optimizer.lr=1e-5
    ll_model.fit(ll_feat, trn_labels, validation_data=(ll_val_feat, val_labels), nb_epoch=12)
    ll_model.optimizer.lr=1e-7
    ll_model.fit(ll_feat, trn_labels, validation_data=(ll_val_feat, val_labels), nb_epoch=1)
    ll_model.save_weights(model_path+'ll_bn' + i + '.h5')

    vgg = vgg16.VGG16(weights='imagenet', include_top=True)
    vgg.layers.pop(); vgg.layers.pop(); 
    #vgg.layers.pop()
    model = Sequential(vgg.layers)
    
    for layer in model.layers: layer.trainable=False
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

    ll_layers = get_ll_layers()
    for layer in ll_layers: model.add(layer)
    for l1,l2 in zip(ll_model.layers, model.layers[-3:]):
        l2.set_weights(l1.get_weights())
    model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    model.save_weights(model_path+'bn' + i + '.h5')
    return model

## Dense model

In [20]:
def get_conv_model(model):
    layers = model.layers
    last_conv_idx = [index for index,layer in enumerate(layers) 
                         if type(layer) is Convolution2D][-1]

    conv_layers = layers[:last_conv_idx+1]
    conv_model = Sequential(conv_layers)
    fc_layers = layers[last_conv_idx+1:]
    return conv_model, fc_layers, last_conv_idx

In [21]:
def get_fc_layers(p, in_shape):
    return [
        MaxPooling2D(input_shape=in_shape),
        Flatten(),
        Dense(4096, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(4096, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(2, activation='softmax')
        ]

In [22]:
def train_dense_layers(i, model):
    conv_model, fc_layers, last_conv_idx = get_conv_model(model)
    conv_shape = conv_model.output_shape[1:]
    fc_model = Sequential(get_fc_layers(0.5, conv_shape))
    for l1,l2 in zip(fc_model.layers[-3:], fc_layers[-3:]): 
        weights = l2.get_weights()
        l1.set_weights(weights)
    fc_model.compile(optimizer=Adam(1e-5), loss='categorical_crossentropy', 
                     metrics=['accuracy'])
    fc_model.fit(trn_features, trn_labels, nb_epoch=4, 
         batch_size=batch_size, validation_data=(val_features, val_labels))

#     gen = image.ImageDataGenerator(rotation_range=10, width_shift_range=0.05, 
#        width_zoom_range=0.05, zoom_range=0.05,
#        channel_shift_range=10, height_shift_range=0.05, shear_range=0.05, horizontal_flip=True)
#     batches = gen.flow(trn, trn_labels, batch_size=batch_size)
#     val_batches = image.ImageDataGenerator().flow(val, val_labels, batch_size=batch_size)

    for layer in conv_model.layers: layer.trainable = False
    for layer in get_fc_layers(0.5, conv_shape): conv_model.add(layer)
    for l1,l2 in zip(conv_model.layers[last_conv_idx+1:], fc_model.layers): 
        l1.set_weights(l2.get_weights())

    conv_model.compile(optimizer=Adam(1e-5), loss='categorical_crossentropy', 
                       metrics=['accuracy'])
    conv_model.save_weights(model_path+'no_dropout_bn' + i + '.h5')
    return conv_model 
#     conv_model.fit_generator(batches, samples_per_epoch=batches.n, nb_epoch=1, 
#                             validation_data=val_batches, nb_val_samples=val_batches.n)
#     
#     for layer in conv_model.layers[16:]: layer.trainable = True
#     conv_model.fit_generator(batches, samples_per_epoch=batches.n, nb_epoch=1, 
#                             validation_data=val_batches, nb_val_samples=val_batches.n)

#     conv_model.optimizer.lr = 1e-7
#     conv_model.fit_generator(batches, samples_per_epoch=batches.n, nb_epoch=1, 
#                             validation_data=val_batches, nb_val_samples=val_batches.n)
#     conv_model.save_weights(model_path + 'aug' + i + '.h5')
#     

## Build ensemble

In [26]:
ens_pred = []
for i in range(5):
    i = str(i)
    model = train_last_layer(i)
    ens_model = train_dense_layers(i, model)
    pred = ens_model.predict_generator(path+'valid',shuffle=False, batch_size=batch_size)
    ens_pred.append(pred)
    
val_avg_preds = np.stack(ens_pred).mean(axis=0)
categorical_accuracy(val_labels, val_avg_preds).eval()

Train on 23000 samples, validate on 2000 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Train on 23000 samples, validate on 2000 samples
Epoch 1/1
Train on 23000 samples, validate on 2000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Train on 23000 samples, validate on 2000 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Train on 23000 samples, validate on 2000 samples
Epoch 1/1
Train on 23000 samples, validate on 2000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Train on 23000 samples, validate on 2000 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Train on 23000 samples, validate on 2000 samples
Epoch 1/1
Train on 23000 samples, validate on 2000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Trai

Epoch 11/12
Epoch 12/12
Train on 23000 samples, validate on 2000 samples
Epoch 1/1
Train on 23000 samples, validate on 2000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Train on 23000 samples, validate on 2000 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12
Train on 23000 samples, validate on 2000 samples
Epoch 1/1
Train on 23000 samples, validate on 2000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


## Combine ensemble and test

In [105]:
#get model then set weights
vgg = vgg16.VGG16(weights='imagenet', include_top=True)
ens_model = Sequential(vgg.layers[:-5])
for layer in get_fc_layers(0.5, ens_model.output_shape[1:]): ens_model.add(layer)

In [122]:
ens_model.compile(optimizer=Adam(1e-5), loss='categorical_crossentropy', metrics=['accuracy'])

In [128]:
def get_fc_pred(val_features):
    fc_model = Sequential()
    for layer in get_fc_layers(0.5, (512,14,14)):
        fc_model.add(layer)

    fc_model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    preds = []
    for i in range(5):
        ens_model.load_weights('models/no_dropout_bn' + str(i) +'.h5')
        for l2,l1 in zip(ens_model.layers[-9:], fc_model.layers): 
            l1.set_weights(l2.get_weights())
        pred = fc_model.predict(val_features)
        preds.append(pred)
    return preds

In [129]:
val_avg_preds = np.stack(get_fc_pred).mean(axis=0)

In [130]:
categorical_accuracy(val_labels, val_avg_preds).eval()

array(0.9704999923706055, dtype=float32)

In [30]:
def get_ens_pred(arr, fname):
    ens_pred = []
    for i in range(5):
        i = str(i)
        ens_model.load_weights('{}{}{}.h5'.format(model_path, fname, i))
        preds = ens_model.predict_generator(arr,arr.n)
        ens_pred.append(preds)
    return ens_pred

In [31]:
val_pred = get_ens_pred(val_batches, 'no_dropout_bn')
test_pred = get_ens_pred(test_batches, 'no_dropout_bn')

Exception in thread Thread-9:
Traceback (most recent call last):
  File "/usr/lib/python2.7/threading.py", line 801, in __bootstrap_inner
    self.run()
  File "/usr/lib/python2.7/threading.py", line 754, in run
    self.__target(*self.__args, **self.__kwargs)
  File "/usr/local/lib/python2.7/dist-packages/Keras-1.2.2-py2.7.egg/keras/engine/training.py", line 433, in data_generator_task
    generator_output = next(self._generator)
  File "/usr/local/lib/python2.7/dist-packages/Keras-1.2.2-py2.7.egg/keras/preprocessing/image.py", line 822, in next
    index_array, current_index, current_batch_size = next(self.index_generator)
  File "/usr/local/lib/python2.7/dist-packages/Keras-1.2.2-py2.7.egg/keras/preprocessing/image.py", line 645, in _flow_index
    current_index = (self.batch_index * batch_size) % n
ZeroDivisionError: integer division or modulo by zero

Exception in thread Thread-10:
Traceback (most recent call last):
  File "/usr/lib/python2.7/threading.py", line 801, in __bootstra

In [32]:
val_avg_preds = np.stack(val_pred).mean(axis=0)
test_avg_preds = np.stack(test_pred).mean(axis=0)

In [34]:
categorical_accuracy(val_labels, val_avg_preds).eval()

array(0.8740000128746033, dtype=float32)