In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from keras.applications.vgg16 import VGG16
from keras.models import Model
from keras.callbacks import ModelCheckpoint,EarlyStopping
from keras.preprocessing.image import ImageDataGenerator


Using TensorFlow backend.


In [3]:
from keras.utils import np_utils
from keras.models import Sequential
from keras.callbacks import EarlyStopping, History, ModelCheckpoint
from keras.layers.core import Flatten, Dense, Dropout, Reshape, Lambda
from keras.layers.normalization import BatchNormalization


In [4]:
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split

In [5]:
import numpy as np

In [19]:
train_features = np.load('train_preprocesed.npy')
valid_features = np.load('valid_preprocessed.npy')

In [8]:
train_dir = "new_train/"
valid_dir = "new_valid/"

In [9]:
classes = os.listdir(train_dir)

In [10]:
# Get the labels

train_labels = []
for c in classes:
    l = [c]*len(os.listdir(train_dir+c+'/'))
    train_labels.extend(l)
    

In [11]:
len(train_labels)

3019

In [12]:
valid_labels = []

for c in classes:
    l = [c]*len(os.listdir(valid_dir+c+'/'))
    valid_labels.extend(l)

In [13]:
onehot_train = to_categorical(LabelEncoder().fit_transform(train_labels))

In [14]:
onehot_valid = to_categorical(LabelEncoder().fit_transform(valid_labels))

In [15]:
vgg16_base = VGG16(include_top=False, weights='imagenet',
                    input_tensor=None, input_shape=(150, 150,3))
# Note that the preprocessing of InceptionV3 is:
# (x / 255 - 0.5) x 2

print('Adding new layers...')
output = vgg16_base.get_layer(index = -1).output  
output = Flatten()(output)
# let's add a fully-connected layer
output = Dense(4096,activation = "relu")(output)
output = BatchNormalization()(output)
output = Dropout(0.5)(output)
output = Dense(512,activation = "relu")(output)
output = BatchNormalization()(output)
output = Dropout(0.5)(output)
# and a logistic layer -- let's say we have 200 classes
output = Dense(8, activation='softmax')(output)


vgg16_model = Model(vgg16_base.input, output)
#InceptionV3_model.summary()


Adding new layers...


In [16]:
for layer in vgg16_model.layers[:19]:
    layer.trainable = False

In [17]:

vgg16_model.compile(optimizer="adam",loss="categorical_crossentropy",metrics =["accuracy"])

In [18]:
vgg16_model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 150, 150, 3)   0                                            
____________________________________________________________________________________________________
block1_conv1 (Convolution2D)     (None, 150, 150, 64)  1792        input_1[0][0]                    
____________________________________________________________________________________________________
block1_conv2 (Convolution2D)     (None, 150, 150, 64)  36928       block1_conv1[0][0]               
____________________________________________________________________________________________________
block1_pool (MaxPooling2D)       (None, 75, 75, 64)    0           block1_conv2[0][0]               
___________________________________________________________________________________________

In [17]:
train_datagen = ImageDataGenerator(
        shear_range=0.1,
        zoom_range=0.1,
        rotation_range=10.,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True)


val_datagen = ImageDataGenerator()



In [18]:
callbacks = EarlyStopping(monitor='val_loss', patience=2, verbose=1, mode='auto')        
# autosave best Model
best_model_file = "./data_augmented_weights.h5"
best_model = ModelCheckpoint(best_model_file, monitor='val_acc', verbose = 1, save_best_only = True)

In [39]:
history = vgg16_model.fit_generator(train_datagen.flow(train_features, onehot_train, batch_size=10), nb_epoch=5,
              samples_per_epoch = 3019,                     
              validation_data=val_datagen.flow(valid_features,onehot_valid,batch_size=10,shuffle=False),
                                    nb_val_samples=758,callbacks = [callbacks,best_model])


Epoch 1/10


KeyboardInterrupt: 

In [19]:
#model.load_weights("batch_normalized_weights.h5")
vgg16_model.load_weights("data_augmented_weights.h5")

In [None]:
# summarize history for accuracy
plt.figure(figsize=(15, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['acc']); plt.plot(history.history['val_acc']);
plt.title('model accuracy'); plt.ylabel('accuracy');
plt.xlabel('epoch'); plt.legend(['train', 'valid'], loc='upper left');

# summarize history for loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss']); plt.plot(history.history['val_loss']);
plt.title('model loss'); plt.ylabel('loss');
plt.xlabel('epoch'); plt.legend(['train', 'valid'], loc='upper left');
plt.show()

In [21]:
test_features = np.load("test_preprocessed.npy")

In [23]:
test_preds = vgg16_model.predict(test_features, batch_size=5, verbose=1)



In [24]:
test_preds[0:5]

array([[  1.23179605e-04,   1.41080118e-05,   3.51649040e-04,
          1.29593900e-04,   9.99090791e-01,   4.37980634e-05,
          1.55789421e-05,   2.31271217e-04],
       [  1.87484369e-01,   3.07487875e-01,   1.74674299e-02,
          2.54575647e-02,   8.45021103e-03,   3.89249623e-01,
          6.22021854e-02,   2.20064702e-03],
       [  9.50872302e-01,   2.27128789e-02,   1.28889267e-04,
          1.37920448e-04,   2.54034036e-04,   2.54313592e-02,
          2.58546585e-04,   2.03995383e-04],
       [  3.44736487e-01,   2.89495615e-03,   6.61096943e-04,
          7.46859354e-04,   1.06167980e-04,   6.49582028e-01,
          7.74195651e-04,   4.98127018e-04],
       [  5.38185894e-01,   3.59348929e-03,   6.22900343e-03,
          1.30366189e-02,   1.78193871e-03,   3.32544744e-02,
          2.60934174e-01,   1.42984435e-01]], dtype=float32)

In [25]:
submission1 = pd.DataFrame(test_preds, columns= os.listdir(train_dir))
test_files = os.listdir("test_stg1/test_stg1/")
submission1.insert(0, 'image', test_files)
submission1.head()

Unnamed: 0,image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT
0,img_00005.jpg,0.000123,1.4e-05,0.000352,0.00013,0.999091,4.4e-05,1.6e-05,0.000231
1,img_00007.jpg,0.187484,0.307488,0.017467,0.025458,0.00845,0.38925,0.062202,0.002201
2,img_00009.jpg,0.950872,0.022713,0.000129,0.000138,0.000254,0.025431,0.000259,0.000204
3,img_00018.jpg,0.344736,0.002895,0.000661,0.000747,0.000106,0.649582,0.000774,0.000498
4,img_00027.jpg,0.538186,0.003593,0.006229,0.013037,0.001782,0.033254,0.260934,0.142984


In [26]:
clipped_preds = np.clip(test_preds,(1-0.82)/7,0.82)

submission2 = pd.DataFrame(clipped_preds, columns= os.listdir("train/train/"))
submission2.insert(0, 'image', test_files)
submission2.head()

Unnamed: 0,image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT
0,img_00005.jpg,0.025714,0.025714,0.025714,0.025714,0.82,0.025714,0.025714,0.025714
1,img_00007.jpg,0.187484,0.307488,0.025714,0.025714,0.025714,0.38925,0.062202,0.025714
2,img_00009.jpg,0.82,0.025714,0.025714,0.025714,0.025714,0.025714,0.025714,0.025714
3,img_00018.jpg,0.344736,0.025714,0.025714,0.025714,0.025714,0.649582,0.025714,0.025714
4,img_00027.jpg,0.538186,0.025714,0.025714,0.025714,0.025714,0.033254,0.260934,0.142984


In [27]:
submission2.to_csv("data_augmented_batch_normalized.csv",index = False)

In [28]:
valid_preds = vgg16_model.predict_classes(valid_features, batch_size=5, verbose=1)

AttributeError: 'Model' object has no attribute 'predict_classes'