In [57]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [90]:
from keras.applications.vgg16 import VGG16
from keras.models import Sequential
from keras.layers.core import Dense, Flatten, Dropout
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.optimizers import Adam
from sklearn.metrics import fbeta_score
import numpy as np

In [59]:
import os
import sys
base_module_path = os.path.abspath(os.path.join('..'))
if base_module_path not in sys.path:
    sys.path.append(base_module_path)
import ama as a
TrainBatch = a.trainbatch.TrainBatch

In [60]:
vgg = VGG16(weights='imagenet', include_top=True)

In [61]:
last_conv_idx = [idx for idx, layer in enumerate(vgg.layers) if type(layer) is Convolution2D][-1]
conv_layers = vgg.layers[:last_conv_idx+1]
fc_layers = vgg.layers[last_conv_idx+1:]

In [62]:
conv_model = Sequential(conv_layers)

In [63]:
def get_fc_model():
    model = Sequential([
            MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
            Flatten(),
            Dense(4096, activation='relu'),
            Dropout(0.5),
            Dense(4096, activation='relu'),
            Dropout(0.5),
            Dense(17, activation='sigmoid')
        ])
    return model

In [64]:
fc_model = get_fc_model()

In [65]:
path = '../data/'
batch_size = 128
img_size = (224,224)

traingen = TrainBatch(path+'train-jpg/', path+'train_v2.csv', batch_size=batch_size, img_size=img_size)
valgen = TrainBatch(path+'val-jpg/', path+'train_v2.csv', batch_size=batch_size, img_size=img_size)

train_labels = traingen.labels
val_labels = valgen.labels

Found 34479 images belonging to 1 classes.
Found 6000 images belonging to 1 classes.


In [66]:
train_features = conv_model.predict_generator(traingen, traingen.nb_sample)
val_features = conv_model.predict_generator(valgen, valgen.nb_sample)

In [67]:
fc_model = get_fc_model()
fc_model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
fc_model.fit(train_features, train_labels, nb_epoch=1, batch_size=batch_size, 
             validation_data=(val_features, val_labels))

Train on 34479 samples, validate on 6000 samples
Epoch 1/1


<keras.callbacks.History at 0x7fe52d7144d0>

In [68]:
fc_model.fit(train_features, train_labels, nb_epoch=5, batch_size=batch_size, 
             validation_data=(val_features, val_labels))

Train on 34479 samples, validate on 6000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fe52d714c90>

In [69]:
fc_model.fit(train_features, train_labels, nb_epoch=10, batch_size=batch_size, 
             validation_data=(val_features, val_labels))

Train on 34479 samples, validate on 6000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fe52d714650>

In [70]:
fc_model.compile(optimizer=Adam(lr=0.00001), loss='binary_crossentropy', metrics=['accuracy'])
fc_model.fit(train_features, train_labels, nb_epoch=5, batch_size=batch_size, 
             validation_data=(val_features, val_labels))

Train on 34479 samples, validate on 6000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fe54738d510>

In [71]:
fc_model.save_weights('../data/weights/fc_vgg_1.hk')

In [72]:
fc_model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy'])
fc_model.fit(train_features, train_labels, nb_epoch=1, batch_size=batch_size, 
             validation_data=(val_features, val_labels))

Train on 34479 samples, validate on 6000 samples
Epoch 1/1


<keras.callbacks.History at 0x7fe52d7c3990>

In [73]:
fc_model.fit(train_features, train_labels, nb_epoch=5, batch_size=batch_size, 
             validation_data=(val_features, val_labels))

Train on 34479 samples, validate on 6000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fe52d7c31d0>

In [81]:
fc_model.load_weights('../data/weights/fc_vgg_1.hk')

In [86]:
val_predictions = fc_model.predict(val_features, batch_size=batch_size)

In [87]:
predictions[10]

array([  1.11175666e-03,   1.56974849e-07,   2.67184691e-06,
         7.67926213e-06,   3.08051625e-07,   7.11987843e-04,
         5.44774055e-04,   4.10613836e-07,   2.18340909e-04,
         6.16230091e-06,   1.65440888e-05,   9.99888897e-01,
         9.99589741e-01,   8.16660700e-04,   1.08350905e-05,
         2.44193984e-07,   5.00072201e-04], dtype=float32)

In [114]:
val_labs2=np.array(val_labels, dtype=np.uint8)
val_labs2

array([[1, 0, 1, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       ..., 
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [124]:
val_labs2[0]

array([1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0], dtype=uint8)

In [122]:
val_preds2=np.array(val_predictions>0.2,dtype=np.uint8)
val_preds2[0]

array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0], dtype=uint8)

In [92]:
??fbeta_score

In [120]:
fbeta_score(val_labs2, val_preds2, beta=2, average='samples')

ValueError: Can't handle mix of multiclass-multioutput and multilabel-indicator