In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from keras.utils import np_utils
from keras.models import Sequential
from keras.applications.vgg16 import VGG16
from keras.preprocessing.image import ImageDataGenerator

from keras.callbacks import EarlyStopping, History, ModelCheckpoint
from keras.layers.core import Flatten, Dense, Dropout, Reshape, Lambda
from keras.layers.normalization import BatchNormalization

from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [None]:
train_features = np.load('train_features.npy')
valid_features = np.load('valid_features.npy')
test_features = np.load("test_features.npy")

In [17]:
train_dir = "nw_train/"
valid_dir = "nw_valid/"

In [18]:
classes = os.listdir(train_dir)

In [19]:
classes

['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']

In [29]:
# GETTING TRAIN DATASET LABELS
train_labels = []
for species in classes:
    l = [species] * len(os.listdir(train_dir + species + '/'))
    train_labels.extend(l)

In [None]:
# GETTING VALID DATASET LABELS
validation_labels = []
for species in classes:
    l = [species] * len(os.listdir(valid_dir + species + '/'))
    validation_labels.extend(l)

In [None]:
#to_categorical converts a class vector (integers) to binary class matrix.
#for use with categorical_crossentropy.

encoded_train = np_utils.to_categorical(LabelEncoder().fit_transform(train_labels))

encoded_valid = np_utils.to_categorical(LabelEncoder().fit_transform(validation_labels))

In [None]:
# Custom_vgg_model_1
#Training the classifier alone
shape = Input(shape=(150, 150,3))

model = VGG16(input_tensor=None, include_top=False,
              weights='imagenet', input_shape = shape)
model.summary()

In [None]:
print('Adding new layers...')
output = model.get_layer(index = -1).output  
output = Flatten()(output)

# FULLY CONNECTED LAYER
output = Dense(4096,activation = "relu")(output)
output = BatchNormalization()(output)
output = Dropout(0.5)(output)
output = Dense(512,activation = "relu")(output)
output = BatchNormalization()(output)
output = Dropout(0.5)(output)

# LOGISTIC LAYER
output = Dense(8, activation='softmax')(output)

custom_vgg_model = Model(model.input, output)
custom_vgg_model.summary()

In [None]:
for layer in vgg16_model.layers[:19]:
    layer.trainable = False

** ImageDataGenerator - Generate batches of tensor image data with real-time data augmentation. The data will be looped over (in batches) indefinitely. **

In [None]:
train_image_gen = ImageDataGenerator(
        shear_range=0.1,
        zoom_range=0.1,
        rotation_range=10.,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True)

val_image_gen = ImageDataGenerator()

In [None]:
# Autosave best Model
callbacks = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='auto') 

#EarlyStopping - Stop training when a monitored quantity has stopped improving.
model_file = "./data_augmented_weights.h5"

# ModelCheckpoint - Save the model after every epoch.
final_model = ModelCheckpoint(filepath = model_file, monitor='val_acc', verbose = 1, save_best_only = True)

In [None]:
history = vgg16_model.fit_generator(train_image_gen.flow(train_features, encoded_train, batch_size=16), nb_epoch=4,
              samples_per_epoch = 3026,                     
              validation_data=val_image_gen.flow(valid_features,encoded_valid,batch_size=16,shuffle=False),
                                    nb_val_samples=750,callbacks = [callbacks,final_model])

In [None]:
# SUMMARIZE ACCURACY HISTORY

plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
plt.plot(history.history['acc']); plt.plot(history.history['val_acc']);
plt.title('Model Accuracy'); plt.ylabel('Accuracy');
plt.xlabel('epoch'); plt.legend(['train', 'valid'], loc='upper left');

# SUMMARIZE lOSS HISTORY

plt.subplot(1, 2, 2)
plt.plot(history.history['loss']); plt.plot(history.history['val_loss']);
plt.title('Model Loss'); plt.ylabel('Loss');
plt.xlabel('epoch'); plt.legend(['train', 'valid'], loc='upper left');

plt.show()

In [None]:
predictions = model.predict_proba(test_features, verbose=1)

In [None]:
submission1 = pd.DataFrame(predictions, columns= classes)
test_files = os.listdir("test_stg1/test_stg1/")
submission.insert(0, 'image', test_files)
submission.head()

In [None]:
clipped_predictions = np.clip(predictions,(1-0.82)/7,0.82)

submission2 = pd.DataFrame(clipped_predictions, columns= classes)
submission2.insert(0, 'image', test_files)
submission2.head()

In [None]:
submission2.to_csv("Augmented_and_Batch_Normalised.csv",index = False)