In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from keras.utils import np_utils
from keras.models import Sequential
from keras.callbacks import EarlyStopping, History, ModelCheckpoint
from keras.layers.core import Flatten, Dense, Dropout, Reshape, Lambda
from keras.layers.normalization import BatchNormalization

from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split

In [None]:
train_features = np.load('train_features.npy')
valid_features = np.load('valid_features.npy')
test_features = np.load("test_features.npy")

In [17]:
train_dir = "nw_train/"
valid_dir = "nw_valid/"

In [18]:
classes = os.listdir(train_dir)

In [19]:
classes

['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']

In [29]:
# Get the labels
train_labels = []
for species in classes:
    l = [species] * len(os.listdir(train_dir + species + '/'))
    train_labels.extend(l)

In [None]:
# Get the labels
validation_labels = []
for species in classes:
    l = [species] * len(os.listdir(valid_dir + species + '/'))
    validation_labels.extend(l)

In [None]:
#to_categorical converts a class vector (integers) to binary class matrix.
#for use with categorical_crossentropy.

encoded_train = np_utils.to_categorical(LabelEncoder().fit_transform(train_labels))

encoded_valid = np_utils.to_categorical(LabelEncoder().fit_transform(validation_labels))

### Three steps to create a CNN
** 1. Convolution **

** 2. Activation **

** 3. Pooling **

** Repeat Steps 1,2,3 for adding more hidden layers **

** 4. After that make a fully connected network **

** This fully connected network gives ability to the CNN **
** to classify the samples **

##### BatchNormalization normalizes the matrix after it is been through a convolution layer so that the scale of each dimension remains the same.

#### Dense layers are keras’s alias for Fully connected layers. These layers give the ability to classify the features learned by the CNN.

#####  Softmax activation enables us to calculate the output based on the probabilities.  

In [31]:
model = Sequential()
number_of_classes = len(classes)

#the input layer
model.add(Flatten(input_shape=train_features.shape[1:]))
model.add(Dense(4096, activation='relu'))
model.add(BatchNormalization()) # BatchNormalization call occurs after a fully-connected layer
model.add(Dropout(0.5)) # to prevent overfitting

#Output layer
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(number_of_classes, activation='softmax'))

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

NameError: name 'train_features' is not defined

In [None]:
# Autosave best Model
callbacks = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='auto') 

#EarlyStopping - Stop training when a monitored quantity has stopped improving.
model_file = "./batch_normalised_weights.h5"

# ModelCheckpoint - Save the model after every epoch.
final_model = ModelCheckpoint(filepath = model_file, monitor='val_acc', verbose = 1, save_best_only = True)

In [None]:
history = model.fit(train_features, encoded_train, batch_size=10, nb_epoch=10,
              validation_data=(valid_features,encoded_valid),shuffle=True,callbacks = [callbacks,final_model])

In [None]:
# SUMMARIZE ACCURACY HISTORY

plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
plt.plot(history.history['acc']); plt.plot(history.history['val_acc']);
plt.title('Model Accuracy'); plt.ylabel('Accuracy');
plt.xlabel('epoch'); plt.legend(['train', 'valid'], loc='upper left');

# SUMMARIZE lOSS HISTORY

plt.subplot(1, 2, 2)
plt.plot(history.history['loss']); plt.plot(history.history['val_loss']);
plt.title('Model Loss'); plt.ylabel('Loss');
plt.xlabel('epoch'); plt.legend(['train', 'valid'], loc='upper left');

plt.show()

In [None]:
predictions = model.predict_proba(test_features, verbose=1)

In [None]:
submission1 = pd.DataFrame(predictions, columns= classes)
test_files = os.listdir("test_stg1/test_stg1/")
submission.insert(0, 'image', test_files)
submission.head()

In [None]:
clipped_predictions = np.clip(predictions,(1-0.82)/7,0.82)

submission2 = pd.DataFrame(clipped_predictions, columns= classes)
submission2.insert(0, 'image', test_files)
submission2.head()

In [None]:
submission2.to_csv("batch_normalized.csv",index = False)