In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from keras.utils import np_utils
from keras.models import Sequential
from keras.callbacks import EarlyStopping, History, ModelCheckpoint
from keras.layers.core import Flatten, Dense, Dropout, Reshape, Lambda
from keras.layers.normalization import BatchNormalization


Using TensorFlow backend.


In [3]:
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split

In [4]:
import numpy as np

In [7]:
train_features = np.load('train_features.npy')
valid_features = np.load('valid_features.npy')

In [8]:
train_dir = "new_train/"
valid_dir = "new_valid/"

In [9]:
classes = os.listdir(train_dir)

In [10]:
# Get the labels

train_labels = []
for c in classes:
    l = [c]*len(os.listdir(train_dir+c+'/'))
    train_labels.extend(l)
    

In [11]:
valid_labels = []

for c in classes:
    l = [c]*len(os.listdir(valid_dir+c+'/'))
    valid_labels.extend(l)

In [12]:
onehot_train = to_categorical(LabelEncoder().fit_transform(train_labels))

In [13]:
onehot_valid = to_categorical(LabelEncoder().fit_transform(valid_labels))

In [14]:
model = Sequential()
model.add(Flatten(input_shape=train_features.shape[1:]))
model.add(Dense(4096, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(Dense(8, activation='softmax'))

In [15]:

model.compile(optimizer="adam",loss="categorical_crossentropy",metrics =["accuracy"])

In [None]:
callbacks = EarlyStopping(monitor='val_loss', patience=4, verbose=1, mode='auto')        
# autosave best Model
best_model_file = "./batch_normalized_weights.h5"
best_model = ModelCheckpoint(best_model_file, monitor='val_acc', verbose = 1, save_best_only = True)

In [None]:
history = model.fit(train_features, onehot_train, batch_size=10, nb_epoch=10,
              validation_data=(valid_features,onehot_valid),shuffle=True,callbacks = [callbacks,best_model])

In [16]:
model.load_weights("batch_normalized_weights.h5")

In [None]:
# summarize history for accuracy
plt.figure(figsize=(15, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['acc']); plt.plot(history.history['val_acc']);
plt.title('model accuracy'); plt.ylabel('accuracy');
plt.xlabel('epoch'); plt.legend(['train', 'valid'], loc='upper left');

# summarize history for loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss']); plt.plot(history.history['val_loss']);
plt.title('model loss'); plt.ylabel('loss');
plt.xlabel('epoch'); plt.legend(['train', 'valid'], loc='upper left');
plt.show()

In [17]:
test_features = np.load("test_features.npy")

In [18]:
test_preds = model.predict_proba(test_features, verbose=1)



In [19]:
test_preds[0:5]

array([[  4.25173909e-07,   4.13326345e-10,   2.08612093e-07,
          1.02339762e-07,   9.99999285e-01,   7.75127407e-10,
          3.30187611e-09,   1.80800690e-08],
       [  7.65270829e-01,   5.39830595e-04,   3.51959281e-02,
          4.75643277e-02,   4.41440193e-07,   2.39610691e-02,
          1.26794487e-01,   6.73155417e-04],
       [  9.51398611e-01,   4.70445603e-02,   2.86779279e-04,
          9.45431704e-04,   1.29936117e-04,   1.60993513e-04,
          3.35412115e-05,   2.68866573e-09],
       [  1.42395124e-03,   2.69332260e-04,   5.78467507e-06,
          8.05944219e-05,   4.06069512e-06,   9.98111248e-01,
          9.44481726e-05,   1.06008356e-05],
       [  9.32031810e-01,   1.87786184e-02,   1.19043136e-04,
          2.66483694e-04,   1.17858285e-02,   1.90835465e-02,
          8.05480126e-03,   9.87994298e-03]], dtype=float32)

In [21]:
submission1 = pd.DataFrame(test_preds, columns= os.listdir(train_dir))
test_files = os.listdir("test_stg1/test_stg1/")
submission1.insert(0, 'image', test_files)
submission1.head()

Unnamed: 0,image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT
0,img_00005.jpg,4.251739e-07,4.133263e-10,2.086121e-07,1.023398e-07,0.9999993,7.751274e-10,3.301876e-09,1.808007e-08
1,img_00007.jpg,0.7652708,0.0005398306,0.03519593,0.04756433,4.414402e-07,0.02396107,0.1267945,0.0006731554
2,img_00009.jpg,0.9513986,0.04704456,0.0002867793,0.0009454317,0.0001299361,0.0001609935,3.354121e-05,2.688666e-09
3,img_00018.jpg,0.001423951,0.0002693323,5.784675e-06,8.059442e-05,4.060695e-06,0.9981112,9.444817e-05,1.060084e-05
4,img_00027.jpg,0.9320318,0.01877862,0.0001190431,0.0002664837,0.01178583,0.01908355,0.008054801,0.009879943


In [27]:
clipped_preds = np.clip(test_preds,(1-0.82)/7,0.82)

submission2 = pd.DataFrame(clipped_preds, columns= os.listdir("train/train/"))
submission2.insert(0, 'image', test_files)
submission2.head()

Unnamed: 0,image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT
0,img_00005.jpg,0.025714,0.025714,0.025714,0.025714,0.82,0.025714,0.025714,0.025714
1,img_00007.jpg,0.765271,0.025714,0.035196,0.047564,0.025714,0.025714,0.126794,0.025714
2,img_00009.jpg,0.82,0.047045,0.025714,0.025714,0.025714,0.025714,0.025714,0.025714
3,img_00018.jpg,0.025714,0.025714,0.025714,0.025714,0.025714,0.82,0.025714,0.025714
4,img_00027.jpg,0.82,0.025714,0.025714,0.025714,0.025714,0.025714,0.025714,0.025714


In [28]:
submission2.to_csv("batch_normalized.csv",index = False)