In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import sys
import caffe
import os
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
import pandas as pd
from pandas import Series, DataFrame
# display plots in this notebook
%matplotlib inline
# set display defaults
plt.rcParams['image.interpolation'] = 'nearest'  # don't interpolate: show square pixels
plt.rcParams['image.cmap'] = 'gray'  # use grayscale output rather than a (potentially misleading) color heatmap

In [None]:
from helper import *

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """


    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, '%.2f'%cm[i, j],
                 horizontalalignment="center", 
                 color="gray" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
features_name = 'siamese'
n_features = 4096*3
n_samples_per_fold = 1
dataset = 3

In [None]:
def load_features(video):
    with open(os.path.join('./data/ArrowDataAll', video, 'features-' + features_name + '.csv'), 'r') as f:
        X = np.loadtxt(f, delimiter = ',')
    with open(os.path.join('./data/ArrowDataAll', video, 'labels-' + features_name + '.csv'), 'r') as f:
        y = np.loadtxt(f, delimiter = ',')
    video_names_with_multitude = [video] * len(y)
    return X, y, video_names_with_multitude

In [None]:
train_list, test_list = load_list('./data', dataset = dataset)

In [None]:
X_train = np.empty((0, n_features))
y_train = np.empty(0)
video_train = []
for video in train_list:
    if train_list.index(video) % 10 == 0:
        print train_list.index(video)+1
    X, y, video_names_with_multitude = load_features(video)
    X_train = np.append(X_train, np.asarray([X[0], X[10], X[20], X[30]]), axis = 0)
    y_train = np.append(y_train, np.asarray([y[0], y[10], y[20], y[30]]), axis = 0)
    video_train.extend([video_names_with_multitude[0], video_names_with_multitude[10], video_names_with_multitude[20], video_names_with_multitude[30]])

In [None]:
X_test = np.empty((0, n_features))
y_test = np.empty(0)
video_test = []
for video in test_list:
    if test_list.index(video) % 10 == 0:
        print test_list.index(video)+1
    X, y, video_names_with_multitude = load_features(video)
    X_test = np.append(X_test, np.asarray([X[0], X[10], X[20], X[30]]), axis = 0)
    y_test = np.append(y_test, np.asarray([y[0], y[10], y[20], y[30]]), axis = 0)
    video_test.extend([video_names_with_multitude[0], video_names_with_multitude[10], video_names_with_multitude[20], video_names_with_multitude[30]])

## Feature importance

In [None]:
clf = RandomForestClassifier(n_jobs = -1, n_estimators=100)
clf.fit(X_train, y_train)
plt.plot(clf.feature_importances_)
plt.savefig('../report/figs/importance-f' + str(features_name) + '-d' + str(dataset) + '.pdf')

## Fitting

In [None]:
clf = SVC(C = 1, kernel = 'linear')
clf.fit(X_train, y_train)

In [None]:
print clf.score(X_test, y_test)

In [None]:
y_predict = clf.predict(X_test)

In [None]:
y_predict[y_predict == 0] = -1
y_test[y_test == 0] = -1

In [None]:
df = DataFrame(Series(y_test))
df[1] = Series(y_predict)
df[2] = Series(video_test)
df.columns = ['true', 'predict', 'video']

In [None]:
fold = [['A']*n_samples_per_fold + ['B']*n_samples_per_fold + ['C']*n_samples_per_fold + ['D']*n_samples_per_fold]*60

In [None]:
import itertools
fold = list(itertools.chain(*fold))
df['fold'] = Series(fold)

In [None]:
dfg = df.groupby(['video', 'fold']).mean().unstack()

In [None]:
dfg['gt'] = dfg.apply(lambda x: (x['true']['A'] + x['true']['B']- x['true']['C']-x['true']['D']) > 0, axis = 1)
dfg['my'] = dfg.apply(lambda x: (x['predict']['A'] + x['predict']['B']- x['predict']['C']-x['predict']['D'])>=0, axis = 1)

In [None]:
dfg['good'] = dfg['gt'] == dfg['my']

In [None]:
dfg

In [None]:
dfg['good'].mean()

In [None]:
cm = confusion_matrix(dfg['gt'], dfg['my'])

In [None]:
plot_confusion_matrix(cm, classes=['Backward', 'Forward'], normalize=True,
                      title='Unormalized confusion matrix')
plt.savefig('../report/figs/cm-' + str(features_name) + '-d' + str(dataset) + '.pdf')