In [7]:
import os
import pandas as pd
import matplotlib.pyplot as plt

In [8]:
from keras.utils import np_utils
from keras.models import Sequential
from keras.callbacks import EarlyStopping, History, ModelCheckpoint
from keras.layers.core import Flatten, Dense, Dropout, Reshape, Lambda
from keras.layers.normalization import BatchNormalization


In [9]:
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split

In [10]:
import numpy as np

In [11]:
train_features = np.load('train_features.npy')
valid_features = np.load('valid_features.npy')

In [12]:
train_dir = "new_train/"
valid_dir = "new_valid/"

In [13]:
classes = os.listdir(train_dir)

In [14]:
# Get the labels

train_labels = []
for c in classes:
    l = [c]*len(os.listdir(train_dir+c+'/'))
    train_labels.extend(l)
    

In [15]:
valid_labels = []

for c in classes:
    l = [c]*len(os.listdir(valid_dir+c+'/'))
    valid_labels.extend(l)

In [16]:
onehot_train = to_categorical(LabelEncoder().fit_transform(train_labels))

In [17]:
onehot_valid = to_categorical(LabelEncoder().fit_transform(valid_labels))

In [18]:
model = Sequential()
model.add(Flatten(input_shape=train_features.shape[1:]))
model.add(Dense(4096, activation='relu'))
#model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
#model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(Dense(8, activation='softmax'))

In [19]:

model.compile(optimizer="adam",loss="categorical_crossentropy",metrics =["accuracy"])

In [20]:
callbacks = EarlyStopping(monitor='val_loss', patience=1, verbose=1, mode='auto')        
# autosave best Model
best_model_file = "./batch_normalized_dropout_weights.h5"
best_model = ModelCheckpoint(best_model_file, monitor='val_acc', verbose = 1, save_best_only = True)

In [16]:
history = model.fit(train_features, onehot_train, batch_size=10, nb_epoch=10,
              validation_data=(valid_features,onehot_valid),shuffle=True,callbacks = [callbacks,best_model])

Train on 3019 samples, validate on 758 samples
Epoch 1/10


KeyboardInterrupt: 

In [21]:
model.load_weights("fully_connected_dropout_weights.h5")

In [None]:
# summarize history for accuracy
plt.figure(figsize=(15, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['acc']); plt.plot(history.history['val_acc']);
plt.title('model accuracy'); plt.ylabel('accuracy');
plt.xlabel('epoch'); plt.legend(['train', 'valid'], loc='upper left');

# summarize history for loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss']); plt.plot(history.history['val_loss']);
plt.title('model loss'); plt.ylabel('loss');
plt.xlabel('epoch'); plt.legend(['train', 'valid'], loc='upper left');
plt.show()

In [22]:
test_features = np.load("test_features.npy")

In [23]:
test_preds = model.predict_proba(test_features, verbose=1)



In [24]:
test_preds[0:5]

array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.]], dtype=float32)

In [25]:
submission1 = pd.DataFrame(test_preds, columns= os.listdir(train_dir))
test_files = os.listdir("test_stg1/test_stg1/")
submission1.insert(0, 'image', test_files)
submission1.head()

Unnamed: 0,image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT
0,img_00005.jpg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,img_00007.jpg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,img_00009.jpg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,img_00018.jpg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,img_00027.jpg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [26]:
clipped_preds = np.clip(test_preds,(1-0.82)/7,0.82)

submission2 = pd.DataFrame(clipped_preds, columns= os.listdir("train/train/"))
submission2.insert(0, 'image', test_files)
submission2.head()

Unnamed: 0,image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT
0,img_00005.jpg,0.025714,0.025714,0.025714,0.025714,0.025714,0.025714,0.025714,0.82
1,img_00007.jpg,0.025714,0.025714,0.025714,0.025714,0.025714,0.025714,0.025714,0.82
2,img_00009.jpg,0.025714,0.025714,0.025714,0.025714,0.025714,0.025714,0.025714,0.82
3,img_00018.jpg,0.025714,0.025714,0.025714,0.025714,0.025714,0.025714,0.025714,0.82
4,img_00027.jpg,0.025714,0.025714,0.025714,0.025714,0.025714,0.025714,0.025714,0.82


In [27]:
submission2.to_csv("fully_connected_dropout.csv",index = False)