Now that we have an initial finetuned model, we can try other things to improve accuracy.

In [1]:
from theano.sandbox import cuda

Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)


In [2]:
%matplotlib inline
import utils; reload(utils)
from utils import *
from __future__ import division, print_function

Using Theano backend.


In [3]:
#path = "data/dogscats/sample/"
path = "data/nails/"
model_path = path + 'models/'
if not os.path.exists(model_path): os.mkdir(model_path)

batch_size=4

Let's try removing dropout since we don't have too much data to work with.

In [4]:
model = vgg_ft(2)

In [5]:
model.load_weights(model_path+'finetune3.h5')

In [6]:
layers = model.layers

In [7]:
last_conv_idx = [index for index,layer in enumerate(layers) 
                     if type(layer) is Convolution2D][-1]

In [8]:
last_conv_idx

30

In [9]:
layers[last_conv_idx]

<keras.layers.convolutional.Convolution2D at 0x7f1b576c1790>

In [10]:
conv_layers = layers[:last_conv_idx+1]
conv_model = Sequential(conv_layers)
fc_layers = layers[last_conv_idx+1:]

In [None]:
batches = get_batches(path+'train', shuffle=False, batch_size=batch_size)
val_batches = get_batches(path+'valid', shuffle=False, batch_size=batch_size)

val_classes = val_batches.classes
trn_classes = batches.classes
val_labels = onehot(val_classes)
trn_labels = onehot(trn_classes)

In [None]:
val_features = conv_model.predict_generator(val_batches, val_batches.nb_sample)

In [None]:
trn_features = conv_model.predict_generator(batches, batches.nb_sample)

In [None]:
save_array(model_path + 'train_convlayer_features.bc', trn_features)
save_array(model_path + 'valid_convlayer_features.bc', val_features)

In [None]:
trn_features = load_array(model_path+'train_convlayer_features.bc')
val_features = load_array(model_path+'valid_convlayer_features.bc')

Remove dropout and accomodate for weights.

In [11]:
def proc_wgts(layer): return [o/2 for o in layer.get_weights()]

In [12]:
# Such a finely tuned model needs to be updated very slowly!
opt = RMSprop(lr=0.00001, rho=0.7)

In [13]:
def get_fc_model():
    model = Sequential([
        MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
        Flatten(),
        Dense(4096, activation='relu'),
        Dropout(0.),
        Dense(4096, activation='relu'),
        Dropout(0.),
        Dense(5, activation='softmax')
        ])

    for l1,l2 in zip(model.layers, fc_layers): l1.set_weights(proc_wgts(l2))

    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [14]:
fc_model = get_fc_model()

And fit the model in the usual way:

In [None]:
fc_model.fit(trn_features, trn_labels, nb_epoch=8, 
             batch_size=batch_size, validation_data=(val_features, val_labels))

In [None]:
fc_model.save_weights(model_path+'no_dropout.h5')

In [15]:
fc_model.load_weights(model_path+'no_dropout.h5')

We might want to try data augmentation:

In [None]:
gen = image.ImageDataGenerator(rotation_range=15, width_shift_range=0.1, 
                               height_shift_range=0.1, zoom_range=0.1, horizontal_flip=True)

In [None]:
batches = get_batches(path+'train', gen, batch_size=batch_size)
val_batches = get_batches(path+'valid', shuffle=False, batch_size=batch_size)

In [16]:
fc_model = get_fc_model()

In [17]:
for layer in conv_model.layers: layer.trainable = False
conv_model.add(fc_model)

In [18]:
conv_model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
conv_model.fit_generator(batches, samples_per_epoch=batches.nb_sample, nb_epoch=100,  
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

In [None]:
conv_model.fit_generator(batches, samples_per_epoch=batches.nb_sample, nb_epoch=3, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

In [None]:
conv_model.save_weights(model_path + 'aug1.h5')

In [19]:
conv_model.load_weights(model_path + 'aug1.h5')

Toss in some batchnorm and try a little dropout with it

In [20]:
def get_bn_layers(p):
    return [
        MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
        Flatten(),
        Dense(4096, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(4096, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(1000, activation='softmax')
        ]

In [21]:
def load_fc_weights_from_vgg16bn(model):
    "Load weights for model from the dense layers of the Vgg16BN model."
    # See imagenet_batchnorm.ipynb for info on how the weights for
    # Vgg16BN can be generated from the standard Vgg16 weights.
    from vgg16bn import Vgg16BN
    vgg16_bn = Vgg16BN()
    _, fc_layers = split_at(vgg16_bn.model, Convolution2D)
    copy_weights(fc_layers, model.layers)

In [22]:
p=0.2

In [23]:
bn_model = Sequential(get_bn_layers(p))

In [24]:
load_fc_weights_from_vgg16bn(bn_model)

In [25]:
def proc_wgts(layer, prev_p, new_p):
    scal = (1-prev_p)/(1-new_p)
    return [o*scal for o in layer.get_weights()]

In [26]:
for l in bn_model.layers: 
    if type(l)==Dense: l.set_weights(proc_wgts(l, 0.5, 0.6))

In [27]:
bn_model.pop()
for layer in bn_model.layers: layer.trainable=False

In [28]:
bn_model.add(Dense(5,activation='softmax'))

In [29]:
bn_model.compile(Adam(), 'categorical_crossentropy', metrics=['accuracy'])

In [None]:
bn_model.fit(trn_features, trn_labels, nb_epoch=50, validation_data=(val_features, val_labels))

In [None]:
bn_model.save_weights(model_path+'bn.h5')

In [30]:
bn_model.load_weights(model_path+'bn.h5')

In [31]:
bn_layers = get_bn_layers(0.2)
bn_layers.pop()
bn_layers.append(Dense(5,activation='softmax'))

In [32]:
final_model = Sequential(conv_layers)
for layer in final_model.layers: layer.trainable = False
for layer in bn_layers: final_model.add(layer)

In [33]:
for l1,l2 in zip(bn_model.layers, bn_layers):
    l2.set_weights(l1.get_weights())

In [34]:
final_model.compile(optimizer=Adam(), 
                    loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
final_model.fit_generator(batches, samples_per_epoch=batches.nb_sample, nb_epoch=50, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

In [None]:
final_model.save_weights(model_path + 'final1.h5')

In [None]:
final_model.load_weights(model_path + 'final1.h5')

In [None]:
final_model.fit_generator(batches, samples_per_epoch=batches.nb_sample, nb_epoch=50, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

In [None]:
final_model.save_weights(model_path + 'final2.h5')

In [35]:
final_model.load_weights(model_path + 'final2.h5')

In [None]:
final_model.optimizer.lr=0.001

In [None]:
final_model.fit_generator(batches, samples_per_epoch=batches.nb_sample, nb_epoch=50, 
                        validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

In [None]:
final_model.evaluate_generator(get_batches(path+'test', gen, False, batch_size=batch_size*2), val_batches.N)

In [None]:
??model.evaluate_generator

In [None]:
bn_model.evaluate_generator(get_batches(path+'test',gen,False,batch_size=5), val_batches.N)

In [None]:
bn_model.summary()

In [None]:
bn_model.summary()

In [None]:
final_model.save(model_path + 'final.h5')

In [None]:
test2_data = get_data(path + 'test2/')
final_model.predict(test2_data)