In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import _pickle as pickle
from os.path import join

import tensorflow as tf
config = tf.ConfigProto()
session = tf.Session(config=config)
from keras import backend as K
K.set_session(session)

from keras.utils import to_categorical

Using TensorFlow backend.


In [2]:
# Load labels and split to train/val
NUM_CLASSES = 120
np.random.seed(seed=2017)
data_dir = '../data'

labels = pd.read_csv(join(data_dir, 'labels.csv'))
l_val = labels.groupby('breed').apply(pd.DataFrame.sample, frac=0.2).reset_index(drop=True)
l_tr = labels.loc[~labels['id'].isin(l_val['id'])]
print('l_tr shape: {}'.format(l_tr.shape))
print('l_val shape: {}'.format(l_val.shape))

assert NUM_CLASSES == len(l_tr.groupby('breed').count()), 'Incorrect number of classes in training set!'

l_tr_index = {label:i for i,label in enumerate(np.unique(l_tr.breed))}
l_tr_temp = [l_tr_index[label] for label in l_tr.breed]
l_val_temp = [l_tr_index[label] for label in l_val.breed]
y_tr = to_categorical(l_tr_temp ,num_classes=120)
y_val = to_categorical(l_val_temp ,num_classes=120)

print('y_tr shape: {}'.format(y_tr.shape))
print('y_val shape: {}'.format(y_val.shape))

l_tr shape: (8185, 2)
l_val shape: (2037, 2)
y_tr shape: (8185, 120)
y_val shape: (2037, 120)


In [3]:
filename = data_dir + '//train//xs_bf_inception_v3'
print('Loading from {}'.format(filename))
with open(filename, 'rb') as fp:
    xs_bf_inception = pickle.load(fp)
print('xs_bf_inception shape: {} size: {:,}'.format(xs_bf_inception.shape, xs_bf_inception.size))

filename = data_dir + '//train//xs_bf_xception'
print('Loading from {}'.format(filename))
with open(filename, 'rb') as fp:
    xs_bf_xception = pickle.load(fp)
print('xs_bf_xception shape: {} size: {:,}'.format(xs_bf_xception.shape, xs_bf_xception.size))

filename = data_dir + '//train//xs_bf_vgg16'
print('Loading from {}'.format(filename))
with open(filename, 'rb') as fp:
    xs_bf_vgg16 = pickle.load(fp)
print('xs_bf_vgg16 shape: {} size: {:,}'.format(xs_bf_vgg16.shape, xs_bf_vgg16.size))

Loading from ../data//train//xs_bf_inception_v3
xs_bf_inception shape: (10222, 2049) size: 20,944,878
Loading from ../data//train//xs_bf_xception
xs_bf_xception shape: (10222, 2049) size: 20,944,878
Loading from ../data//train//xs_bf_vgg16
xs_bf_vgg16 shape: (10222, 513) size: 5,243,886


In [4]:
xs = np.concatenate(
        (xs_bf_inception.as_matrix(columns=xs_bf_inception.columns[1:]),
         xs_bf_xception.as_matrix(columns=xs_bf_xception.columns[1:]),
         xs_bf_vgg16.as_matrix(columns=xs_bf_vgg16.columns[1:])),
    axis=1)

print('Concatenated shape {} should be {} = {} + {} + {}'.format(
    xs.shape[1],
    xs_bf_inception.shape[1]-1 + xs_bf_xception.shape[1]-1 + xs_bf_vgg16.shape[1]-1,
    xs_bf_inception.shape[1]-1,
    xs_bf_xception.shape[1]-1,
    xs_bf_vgg16.shape[1]-1))

correlation_matrix = np.corrcoef(xs.T)
print('correlation_matrix shape {}'.format(correlation_matrix.shape))

Concatenated shape 4608 should be 4608 = 2048 + 2048 + 512
correlation_matrix shape (4608, 4608)


In [5]:
# Display very big correlations
big_cors = []
for i in range(correlation_matrix.shape[0]):
    for j in range(i+1, correlation_matrix.shape[1]):
        if abs(correlation_matrix[i][j]) >= 0.9:
            print('Correlation {} at [{}][{}]'.format(correlation_matrix[i][j], i, j))
            big_cors.append([correlation_matrix[i][j], i, j])

print('Found {:.0f} pairs of higly correlated bottleneck features'.format(len(big_cors)))

Correlation 0.9394352834738481 at [388][463]
Correlation 0.9374163104213367 at [393][726]
Correlation 0.926437519461309 at [536][615]
Found 3 pairs of higly correlated bottleneck features
