In [1]:
#%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm

from time import time

from sklearn.externals import joblib
from sklearn import manifold

In [2]:
#Load data
train_set = joblib.load('./dataset/train_set_wlabels_it30.pkl')

In [3]:
#extract features and put into arrays
mbhx_feats = np.array(train_set.feature_dict['MBHx'])
mbhy_feats = np.array(train_set.feature_dict['MBHy'])
traj_feats = np.array(train_set.feature_dict['Trajectory'])
hog_feats  = np.array(train_set.feature_dict['HOG'])
hof_feats  = np.array(train_set.feature_dict['HOF'])

In [5]:
#try with only trajectory features
manifold_TSNE = manifold.TSNE(n_components=2, init='pca')
t0 = time()
Y_TSNE_traj = manifold_TSNE.fit_transform(traj_feats)
t1 = time()
print 't-SNE finished in {0} seconds. Transformed data of size:{1} to size:{2}'\
.format(t1-t0, traj_feats.shape, Y_TSNE_traj.shape)

t-SNE finished in 37.9910240173 seconds. Transformed data of size:(1206, 4000) to size:(1206, 2)


In [8]:
#plot data
#set up the plot
def scatter_vis(Y, labels, axis_fontsize = 14, title="", title_fontsize = 16):
    fig, ax = plt.subplots(figsize=(16,12))

    #scatter plot
    cax = plt.scatter(Y[:,0], Y[:,1], c=labels, s=100, alpha=.7, cmap=cm.coolwarm)

    #colorbar
    cb = fig.colorbar(cax)
    cb.set_label('perceptual load (TrueSkill mu)', labelpad=20, fontdict={'fontsize': axis_fontsize})

    #axes
    ax.set_xlabel('t-SNE component 1', fontdict={'fontsize': axis_fontsize})
    ax.set_ylabel('t-SNE component 2', fontdict={'fontsize': axis_fontsize})
    ax.grid(b=True)
    ax.xaxis.set_ticklabels([])
    ax.yaxis.set_ticklabels([])
    ax.set_title(title, fontdict={'fontsize': title_fontsize})

    return fig, ax

In [21]:
fig, ax = scatter_vis(Y_TSNE_traj, labels, title='Trajectory features only')
plt.show()

In [None]:
#construct motion features
motion_feats = np.hstack((mbhx_feats, mbhy_feats, traj_feats))
print 'examples x features shape: {0}'.format(motion_feats.shape)
labels = np.array(train_set.feature_dict['Label'])
print 'labels shape: {0}'.format(labels.shape)

In [10]:
#transform into lower dimensional space for visualisation
manifold_TSNE = manifold.TSNE(n_components=2, init='pca')
t0 = time()
Y_TSNE_motion = manifold_TSNE.fit_transform(motion_feats)
t1 = time()
print 't-SNE finished in {0} seconds. Transformed data of size:{1} to size:{2}' \
.format(t1-t0, motion_feats.shape, Y_TSNE_motion.shape)

t-SNE finished in 38.6489510536 seconds. Transformed data of size:(1206, 12000) to size:(1206, 2)


In [None]:
fig, ax = scatter_vis(Y_TSNE_motion, labels, title='Trajectory + MBH features')
plt.show()

In [12]:
#try with full features (20k)
full_feats = np.hstack((traj_feats, mbhx_feats, mbhy_feats, hog_feats, hof_feats))
manifold_TSNE = manifold.TSNE(n_components=2, init='pca')
t0 = time()
Y_TSNE_full = manifold_TSNE.fit_transform(full_feats)
t1 = time()
print 't-SNE finished in {0} seconds. Transformed data of size:{1} to size:{2}'\
.format(t1-t0, full_feats.shape, Y_TSNE_full.shape)

t-SNE finished in 41.1924688816 seconds. Transformed data of size:(1206, 20000) to size:(1206, 2)


In [24]:
fig, ax = scatter_vis(Y_TSNE_full, labels, title='Trajectory + MBH + HOG + HOF features (all)')
plt.show()

In [None]:
#write csv for visualisation
with open('motion_feats_mapping.csv', 'wb') as f:
    import csv
    cw = csv.writer(f)
    for ii, name in enumerate(train_set.feature_dict['Video_Name']):
        cw.writerow([name[0], train_set.feature_dict['Label'][ii], Y_TSNE_motion[ii][0], Y_TSNE_motion[ii][1]]