In [1]:
import os
from convnets import *
from __future__ import division, print_function

%matplotlib inline

from keras.backend.tensorflow_backend import set_session

config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.9
set_session(tf.Session(config=config))

Using TensorFlow backend.


# The Nature Conservancy: Fisheries Monitoring
The Conservancy has invited the Kaggle community to develop algorithms to automatically detect and classify species of tunas, sharks and more that fishing boats catch, which will accelerate the video review process.

We will apply transfer learning using the pretrained model VGG16, and replacing the top "classification" layers with:
- MLP augmented with dropout and batch-normalization
- fullcy convolutional classifier

## Getting Bottleneck features

In [2]:
train_path = './train/'
valid_path = './valid/'
test_path = './test/'
batch_size=32

In [3]:
model = get_VGG16()
train_bottleneck = get_batches(train_path,batch_size=batch_size,shuffle=False)
valid_bottleneck = get_batches(valid_path,batch_size=batch_size,shuffle=False)

Found 3022 images belonging to 8 classes.
Found 755 images belonging to 8 classes.


In [4]:
%%time
print('Getting bottleneck features for train')
bottleneck_features_train = model.predict_generator(train_bottleneck, train_bottleneck.samples // batch_size)
np.save(open('./initial_files/train_bottleneck.npy', 'wb'), bottleneck_features_train)

print('Getting bottleneck features for validation')
bottleneck_features_validation = model.predict_generator(valid_bottleneck, valid_bottleneck.samples // batch_size)
np.save(open('./initial_files/valid_bottleneck.npy', 'wb'), bottleneck_features_validation)

Getting bottleneck features for train
Getting bottleneck features for validation
CPU times: user 2min 11s, sys: 11.5 s, total: 2min 22s
Wall time: 1min 12s


## Fitting
#### 1 - MLP augmented with dropout and batch-normalization

In [5]:
mlp_vgg = top_model_vgg(n_classes=train_bottleneck.num_class,dense_neurons=512,lr=0.001,do=0.5)
mlp_vgg.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
top_maxpooling (MaxPooling2D (None, 3, 3, 512)         0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 3, 3, 512)         2048      
_________________________________________________________________
dropout_1 (Dropout)          (None, 3, 3, 512)         0         
_________________________________________________________________
top_flatten (Flatten)        (None, 4608)              0         
_________________________________________________________________
top_relu_1 (Dense)           (None, 512)               2359808   
_________________________________________________________________
batch_normalization_2 (Batch (None, 512)               2048      
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
__________

In [6]:
mlp_vgg.fit(bottleneck_features_train,
          np_utils.to_categorical(train_bottleneck.classes[:len(bottleneck_features_train)]),
          epochs = 10,
          batch_size = batch_size,
          validation_data = (bottleneck_features_validation,
                             np_utils.to_categorical(valid_bottleneck.classes[:len(bottleneck_features_validation)])))

Train on 3008 samples, validate on 736 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f1544f60588>

#### 2 - Fully onvolutional layers

In [7]:
model_fc = heat_layers(n_classes=train_bottleneck.num_class,n_fm=256,do=0.5,lr=0.001)
model_fc.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_4 (Batch (None, 7, 7, 512)         2048      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 7, 7, 256)         1179904   
_________________________________________________________________
batch_normalization_5 (Batch (None, 7, 7, 256)         1024      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 7, 7, 256)         590080    
_________________________________________________________________
batch_normalization_6 (Batch (None, 7, 7, 256)         1024      
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 7, 7, 256)         590080    
_________________________________________________________________
batch_normalization_7 (Batch (None, 7, 7, 256)         1024      
__________

In [8]:
model_fc.fit(bottleneck_features_train,
             np_utils.to_categorical(train_bottleneck.classes[:len(bottleneck_features_train)]),
             epochs = 10,
             batch_size = batch_size,
             validation_data = (bottleneck_features_validation,
                                np_utils.to_categorical(valid_bottleneck.classes[:len(bottleneck_features_validation)])))

Train on 3008 samples, validate on 736 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f153e036c18>

## Predict

In [9]:
test = get_batches(test_path,batch_size=7,shuffle=False)

Found 13153 images belonging to 2 classes.


In [10]:
%%time
test_bottleneck_features = model.predict_generator(test, test.samples // 7, verbose=1)

CPU times: user 7min 56s, sys: 42.5 s, total: 8min 38s
Wall time: 4min 53s


In [11]:
preds = mlp_vgg.predict(test_bottleneck_features,batch_size=7,verbose=1)



In [12]:
preds_dict = dict()
for idx, img in enumerate(test.filenames):
    name = img.split('/')[1]
    tmp = dict()
    for i, p in enumerate(preds[idx]):
        tmp[i] = p
    preds_dict[name] = tmp

In [13]:
submission = pd.DataFrame.from_dict(preds_dict,orient='index')
submission.columns = sorted(list(train_bottleneck.class_indices.keys()))
submission.reset_index(level=0, inplace=True)
submission.rename(columns={'index':'image'},inplace=True)

submission.to_csv('./submission.csv',index=False)