# Fully connected NN on bottleneck features
Concatenate all preprocessed features from Inception, Xception, and InceptionResNet.

Then train a fully connected NN with 1 hidden layer, last layer with softmax.

Use 5 fold crossvalidation and model ensemble to produce the submission.

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import _pickle as pickle
from os import listdir
from os.path import join, isfile

import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)
from keras import backend as K
K.set_session(session)

from keras.utils import to_categorical
from keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.regularizers import l1, l2, l1_l2

from sklearn.metrics import log_loss, accuracy_score

Using TensorFlow backend.


In [3]:
# Load labels and split to train/val
NUM_CLASSES = 120
SEED = 1993
NFOLDS = 5
np.random.seed(seed=SEED)
data_dir = '../data'

labels = pd.read_csv(join(data_dir, 'labels.csv'))
print('Number of all train images: {}'.format(len(labels)))
print("Train data has {} classes.".format(len(labels.groupby('breed').count())))
assert len(labels.groupby('breed').count()) == NUM_CLASSES, 'Number of classes in training set is not 120!'

sample_submission = pd.read_csv(join(data_dir, 'sample_submission.csv'))
print('Number of all test images: {}'.format(len(sample_submission)))

# Add a 'fold' column that contains numbers from 0 to NFOLDS
# Use it to split train/val during crossvalidation.
labels['fold'] = pd.Series(np.random.randint(0,NFOLDS,size=(labels.shape[0])))
labels_index = {label:i for i,label in enumerate(np.sort(np.unique(labels.breed)))}
labels_temp = [labels_index[label] for label in labels.breed]
ys = np.concatenate( (to_categorical(labels_temp ,num_classes=120), labels.as_matrix(columns=['fold'])), axis=1)
print('ys shape: {}'.format(ys.shape))

Number of all train images: 10222
Train data has 120 classes.
Number of all test images: 10357
ys shape: (10222, 121)


In [4]:
# Load train data
filename = data_dir + '//train//xs_bf_inception_v3'
print('Loading from {}'.format(filename))
with open(filename, 'rb') as fp:
    xs_bf_inception = pickle.load(fp)
print('xs_bf_inception shape: {} size: {:,}'.format(xs_bf_inception.shape, xs_bf_inception.size))

filename = data_dir + '//train//xs_bf_xception'
print('Loading from {}'.format(filename))
with open(filename, 'rb') as fp:
    xs_bf_xception = pickle.load(fp)
print('xs_bf_xception shape: {} size: {:,}'.format(xs_bf_xception.shape, xs_bf_xception.size))

filename = data_dir + '//train//xs_bf_irn'
print('Loading from {}'.format(filename))
with open(filename, 'rb') as fp:
    xs_bf_irn = pickle.load(fp)
print('xs_bf_irn shape: {} size: {:,}'.format(xs_bf_irn.shape, xs_bf_irn.size))

N_FEATURES = xs_bf_inception.shape[1] + xs_bf_xception.shape[1] + xs_bf_irn.shape[1] - 3

xs_bf = pd.DataFrame(data=np.concatenate(
    (labels.as_matrix(columns=['id', 'fold']),
     xs_bf_inception.as_matrix(columns=xs_bf_inception.columns[0:]),
     xs_bf_xception.as_matrix(columns=xs_bf_xception.columns[1:]),
     xs_bf_irn.as_matrix(columns=xs_bf_irn.columns[1:])),
     axis=1), columns=['id', 'fold', 'id2']+list(range(N_FEATURES)))
print('Concatenated features xs_bf shape: {} size: {:,}'.format(xs_bf.shape, xs_bf.size))

assert xs_bf.id.equals(xs_bf.id2), 'ID from labels has to be equal to ID from xs!'

Loading from ../data//train//xs_bf_inception_v3
xs_bf_inception shape: (10222, 2049) size: 20,944,878
Loading from ../data//train//xs_bf_xception
xs_bf_xception shape: (10222, 2049) size: 20,944,878
Loading from ../data//train//xs_bf_irn
xs_bf_irn shape: (10222, 1537) size: 15,711,214
Concatenated features xs_bf shape: (10222, 5635) size: 57,600,970


In [6]:
# Load test set data
filename = data_dir + '//test//xs_bf_inception'
print('Loading from {}'.format(filename))
with open(filename, 'rb') as fp:
    xs_bf_inception = pickle.load(fp)
print('xs_bf_inception shape: {} size: {:,}'.format(xs_bf_inception.shape, xs_bf_inception.size))

filename = data_dir + '//test//xs_bf_xception'
print('Loading from {}'.format(filename))
with open(filename, 'rb') as fp:
    xs_bf_xception = pickle.load(fp)
print('xs_bf_xception shape: {} size: {:,}'.format(xs_bf_xception.shape, xs_bf_xception.size))

filename = data_dir + '//test//xs_bf_irn'
print('Loading from {}'.format(filename))
with open(filename, 'rb') as fp:
    xs_bf_irn = pickle.load(fp)
print('xs_bf_irn shape: {} size: {:,}'.format(xs_bf_irn.shape, xs_bf_irn.size))

xs_test_bf = pd.DataFrame(data=np.concatenate(
    (xs_bf_inception.as_matrix(columns=xs_bf_inception.columns[0:]),
     xs_bf_xception.as_matrix(columns=xs_bf_xception.columns[1:]),
     xs_bf_irn.as_matrix(columns=xs_bf_irn.columns[1:])
    ), axis=1), columns=['id']+list(range(N_FEATURES)))
print('Concatenated features xs_test_bf shape: {} size: {:,}'.format(xs_test_bf.shape, xs_test_bf.size))

Loading from ../data//test//xs_bf_inception
xs_bf_inception shape: (10357, 2049) size: 21,221,493
Loading from ../data//test//xs_bf_xception
xs_bf_xception shape: (10357, 2049) size: 21,221,493
Loading from ../data//test//xs_bf_irn
xs_bf_irn shape: (10357, 1537) size: 15,918,709
Concatenated features xs_test_bf shape: (10357, 5633) size: 58,340,981


In [9]:
def setupModel():
    model = Sequential()

    model.add(Dense(200, input_shape=(N_FEATURES,), kernel_regularizer=l2(0.00001)))
    model.add(Activation('relu'))
    model.add(Dropout(0.4))
    
    model.add(Dense(120, activation='softmax', kernel_regularizer=l2(0.0000001)))
    
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model

In [15]:
models = [{} for fold in range(NFOLDS)]
histories = [{} for fold in range(NFOLDS)]
scores = [{} for fold in range(NFOLDS)]
ys_train = np.zeros((NFOLDS, xs_bf.shape[0], NUM_CLASSES))
ys_test = np.zeros((NFOLDS, xs_test_bf.shape[0], NUM_CLASSES))

for fold in range(NFOLDS):
    print('\n### fold {} ###'.format(fold))
    
    # Split data
    x_tr = xs_bf[xs_bf.fold != fold].as_matrix(columns=xs_bf.columns[3:]).astype('float32')
    y_tr = ys[ys[:,-1] != fold][:,:-1].astype('float32')
    print('Training data x: {} y: {}'.format(x_tr.shape, y_tr.shape))
    x_val = xs_bf[xs_bf.fold == fold].as_matrix(columns=xs_bf.columns[3:]).astype('float32')
    y_val = ys[ys[:,-1] == fold][:,:-1].astype('float32')
    print('Validation data x: {} y: {}'.format(x_val.shape, y_val.shape))
    
    # Setup model
    models[fold] = setupModel()
    # models[fold].summary()
    
    # Fit the model
    histories[fold] = models[fold].fit(x_tr, y_tr,
          batch_size=1500,
          epochs=50,
          verbose=0,
          validation_data=(x_val, y_val),
          shuffle=True,
          callbacks=[EarlyStopping(monitor='val_loss', patience=2)])

    # Evaluate the model
    scores[fold] = models[fold].evaluate(x_val, y_val, verbose=1)
    print('Validation loss: {:.6f}'.format(scores[fold][0]))
    print('Validation accuracy: {:.3%}'.format(scores[fold][1]))
    
    # Predict on train set
    ys_train[fold] = models[fold].predict(
        xs_bf.as_matrix(columns=xs_bf.columns[3:]).astype('float32'),
        batch_size=512,
        verbose=0)
    print('Predicted ys_train[{}] shape: {} size: {:,}'.format(fold, ys_train[fold].shape, ys_train[fold].size))

    # Predict on test set
    ys_test[fold] = models[fold].predict(
        xs_test_bf.as_matrix(columns=xs_test_bf.columns[1:]).astype('float32'),
        batch_size=512,
        verbose=1)
    print('Predicted ys_test[{}] shape: {} size: {:,}'.format(fold, ys_test[fold].shape, ys_test[fold].size))
    
print('\nFertig')


### fold 0 ###
Training data x: (8138, 5632) y: (8138, 120)
Validation data x: (2084, 5632) y: (2084, 120)
Validation loss: 0.281237
Validation accuracy: 91.891%
Predicted ys_train[0] shape: (10222, 120) size: 1,226,640
Predicted ys_test[0] shape: (10357, 120) size: 1,242,840

### fold 1 ###
Training data x: (8213, 5632) y: (8213, 120)
Validation data x: (2009, 5632) y: (2009, 120)
Validation loss: 0.289977
Validation accuracy: 91.538%
Predicted ys_train[1] shape: (10222, 120) size: 1,226,640
Predicted ys_test[1] shape: (10357, 120) size: 1,242,840

### fold 2 ###
Training data x: (8118, 5632) y: (8118, 120)
Validation data x: (2104, 5632) y: (2104, 120)
Validation loss: 0.245679
Validation accuracy: 92.728%
Predicted ys_train[2] shape: (10222, 120) size: 1,226,640
Predicted ys_test[2] shape: (10357, 120) size: 1,242,840

### fold 3 ###
Training data x: (8138, 5632) y: (8138, 120)
Validation data x: (2084, 5632) y: (2084, 120)
Validation loss: 0.250923
Validation accuracy: 92.131%
Pre

In [38]:
print('Average loss and accuracy')
print(np.asarray(scores) )

# Only pick the best models
w = [0, 0, 1, 1 ,1]

# Average over the best models and evaluate on the whole train set
train_predictions = np.average(ys_train, axis=0, weights=w)
print('Prediction shape: {}'.format(train_predictions.shape))

print('Validation LogLoss {}'.format(log_loss( ys[:,:-1], train_predictions)))
print('Validation Accuracy {}'.format(accuracy_score(
    (ys[:,:-1] * range(NUM_CLASSES)).sum(axis=1),
    np.argmax(train_predictions, axis=1) )))

Average loss and accuracy
[[ 0.28123696  0.91890595]
 [ 0.28997732  0.91538079]
 [ 0.24567856  0.92728137]
 [ 0.25092281  0.92130518]
 [ 0.24460087  0.92529624]]
Prediction shape: (10222, 120)
Validation LogLoss 0.14085716095582876
Validation Accuracy 0.9551946781451771


In [39]:
# Average over all models for test set
predictions = np.average(ys_test, axis=0, weights=w)
print('Prediction shape: {}'.format(predictions.shape))

Prediction shape: (10357, 120)


In [40]:
# Save submission
for i, breed in enumerate(sample_submission.columns[1:]):
    sample_submission[breed] = predictions[:, labels_index[breed]]
    
sample_submission.to_csv('..//submissions//sub_crossval_nn_inc_xc_irn_l2.csv', index=None)