In [1]:
import os
# Check cwd
os.chdir('/home/atom/cvlab/thesis/') 
# os.chdir('/home/member/github/BS-thesis') 
# BS-thesis should be root
print(os.getcwd())

/home/atom/cvlab/thesis


In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# Dependencies
import glob, itertools
import matplotlib.pyplot as plt
import numpy as np

from tqdm import tqdm_notebook
from functools import partial
from torch.utils.data import Dataset, DataLoader
from sklearn.decomposition import PCA, KernelPCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

from my_pkg import dataset
from my_pkg import utils

import sys
sys.path.append('cvlab_toolbox')
from cvt.models import SubspaceMethod, MutualSubspaceMethod, KernelMSM, ConstrainedMSM

# Shape subspaces 

1. Make shape subspaces for each frame.
    - Perform Gram-Shmidt on each frame matrix


2. Compute Grassman kernel
    - Kernel size will be NF x NF


3. Concatenate column vectors of the same jump sequence
    - We will have N vectors of size NF^2

### Data Preperation

In [4]:
pose_paths = glob.glob('./data/marker/*.csv')
grf_paths = glob.glob('./data/GRF/*.csv')
info_path = './data/info.xlsx'
dataset = dataset.SLJDataset(pose_paths, grf_paths, info_path)

def transform(sample):
    sample['trunc_pose'] = sample['trunc_pose'][-300::]
    return sample    

dataset.transforms = [transform] 
trainset, testset = dataset.train_test_split(test_size = 0.3, stratify=0)

--- Split stats ---
Number of train subjects: 100
Number of test subjects: 43

Subject Ratio: 2.3255813953488373
Sample Ratio: 2.337579617834395

Labels Ratio ---
Label ['miss', 'healthy', 'structural', 'subjective', 'recovered', 'prone']
Train [0.18256131 0.21253406 0.49318801 0.34877384 0.11171662 0.31880109]
Test [0.17834395 0.19745223 0.51592357 0.28025478 0.15923567 0.28025478]


In [5]:
X_train, y_train = trainset['trunc_pose'], np.asarray(trainset['label'])[:, 0]
X_test, y_test = testset['trunc_pose'], np.asarray(testset['label'])[:, 0]

## Create shape subspaces

This has an unpractical calculation time

In [6]:
# F = len(X_train[0]) # number of frames
# N = len(X_train) # length of dataset
# K = np.zeros((N*F, N*F))

# reshaped_X_train = np.asarray(X_train).reshape(N*F, 29, 3)
# for i, j in tqdm_notebook(itertools.product(range(N*F), range(N*F)), total=(N*F)**2):
#     S1 , S2 = reshaped_X_train[i], reshaped_X_train[j]
#     S1 = utils.normalize_columns(utils.gram_schmidt(S1))
#     S2 = utils.normalize_columns(utils.gram_schmidt(S2))

#     K[i,j] = utils.grassman_kernel(S1, S2)

MemoryError: 

Below we 
- Create shape subspaces for each frame
- Calculate the kernel matrix of each sequence
- Do 1-nn MSM
or 
- Concatenate each kernel matrix

In [None]:
F = len(X_train[0]) # number of frames
N = len(X_train) # length of dataset


seq_subspaces_train = []
for x in tqdm_notebook(X_train):
    K = np.zeros((F, F))
    for i, j in itertools.product(range(F), range(F)):
        S1 , S2 = x[i], x[j]
        S1 = utils.normalize_columns(utils.gram_schmidt(S1))
        S2 = utils.normalize_columns(utils.gram_schmidt(S2))

        K[i,j] = utils.grassman_kernel(S1, S2)
    seq_subspaces_train.append(utils.normalize_columns(utils.gram_schmidt(K)))

HBox(children=(IntProgress(value=0, max=367), HTML(value='')))

In [None]:
seq_subspaces_test = []
for x in tqdm_notebook(X_test):
    K = np.zeros((F, F))
    for i, j in itertools.product(range(F), range(F)):
        S1 , S2 = x[i], x[j]
        S1 = utils.normalize_columns(utils.gram_schmidt(S1))
        S2 = utils.normalize_columns(utils.gram_schmidt(S2))

        K[i,j] = utils.grassman_kernel(S1, S2)
    seq_subspaces_test.append(utils.normalize_columns(utils.gram_schmidt(K)))

In [None]:
scores = []
for n_subdims in tqdm_notebook(range(1, 30)):
    msm = MutualSubspaceMethod(n_subdims=n_subdims)
    msm.fit(seq_subspaces_train, y_train)
    scores.append(msm.score(seq_subspaces_test, y_test))
    
plt.plot(scores)
plt.title(f'MSM {max(scores)}')
plt.show()

In [None]:
scores = []
n_subdims = 15 # scores.index(max(scores))
for sigma in tqdm_notebook(np.logspace(-2, 2, 20)):
    msm = KernelMSM(n_subdims=n_subdims, sigma=sigma)
    msm.fit(seq_subspaces_train, y_train)
    scores.append(msm.score(seq_subspaces_test, y_test))
    
plt.plot(scores)
plt.title(f'KernelMSM {max(scores)}')
plt.show()

In [None]:
scores = []
for n_subdims in tqdm_notebook(range(1, 20)):
    cmsm = ConstrainedMSM(n_subdims=n_subdims, n_gds_dims=n_subdims*2)
    cmsm.fit(seq_subspaces_train, y_train)
    scores.append(cmsm.score(seq_subspaces_test, y_test))
    
plt.plot(scores)
plt.title(f'ConstrainedMSM {max(scores)}')
plt.show()

In [None]:
# Guessing all as one
print(len(y_test[y_test==0])/len(y_test))