<center> 
    <h1>Mini-project1</h1>
    <h2>Predict the laterality of upcoming finger movements</h2>
</center>

#### Imports

In [None]:
import numpy as np
import matplotlib.pylab as plt
import dlc_bci as bci
from types import SimpleNamespace 

from sklearn.model_selection import cross_validate
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# baselines
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

import torch 
from torch.nn import Conv2d, Linear, functional, Module, CrossEntropyLoss
from torch import Tensor
from torch.autograd import Variable
from torch.optim import Adam
import torch.nn.functional as F

from helpers import *
from modelWrapper import *

## Download/load the dataset

- load the dataset

In [None]:
X_tr, y_tr = bci.load(root='./data_bci')
print(str(type(X_tr)), X_tr.size())
print(str(type(y_tr)), y_tr.size())

X_te, y_te = bci.load(root='./data_bci', train=False)
print(str(type(X_te)), X_te.size())
print(str(type(y_te)), y_te.size())

In [None]:
i = 45
plt.imshow(X_tr[i])

- Organize the dataset in train and test by also converting the X in array. 

In [None]:
train = SimpleNamespace()
train.X = SimpleNamespace()
train.X.variable = Variable(X_tr.unsqueeze(1))
train.X.numpy = X_tr.numpy().reshape(X_tr.shape[0], -1)
train.y = SimpleNamespace()
train.y.variable = Variable(y_tr)
train.y.numpy = y_tr.numpy()

test = SimpleNamespace()
test.X = SimpleNamespace()
test.X.variable = Variable(X_te.unsqueeze(1))
test.X.numpy = X_te.numpy().reshape(X_te.shape[0], -1)
test.y = SimpleNamespace()
test.y.variable = Variable(y_te)
test.y.numpy = y_te.numpy()

## Baselines:

### Logistic regression

In [None]:
lambdas = np.logspace(-6, 6, 20) # grid search on a parameter of the model

# here we store all the scores obtained with the different lambdas
logreg = {
    "tr_scores": [],
    "va_scores": []
}

for lambda_ in lambdas:
    result = cross_validate(LogisticRegression(C=lambda_), train.X.numpy, train.y.numpy, cv=10, return_train_score=True)
    
    logreg["tr_scores"].append(np.mean(result["train_score"]))
    logreg["va_scores"].append(np.mean(result["test_score"]))
    
plot_scores(lambdas, "lambda", logreg["tr_scores"], logreg["va_scores"], log_scale=True)

best_lambda = lambdas[np.argmax(logreg["va_scores"])]
print('Best lambda:', best_lambda)
print('Test score:', 
      LogisticRegression(C=best_lambda)
      .fit(train.X.numpy, train.y.numpy)
      .score(test.X.numpy, test.y.numpy))

### Random Forest

In [None]:
depths = np.arange(5, 100, 10) # grid search on a parameter of the model

# here we store all the scores obtained with the different depths
randForest = {
    "tr_scores": [],
    "va_scores": []
}

for depth in depths:
    result = cross_validate(
        RandomForestClassifier(n_estimators=100, max_depth=depth, n_jobs=-1, random_state=1), 
        train.X.numpy, train.y.numpy, cv=10, return_train_score=True)
    
    randForest["tr_scores"].append(np.mean(result["train_score"]))
    randForest["va_scores"].append(np.mean(result["test_score"]))
    
plot_scores(depths, "depth", randForest["tr_scores"], randForest["va_scores"], log_scale=False)

best_depth = depths[np.argmax(randForest["va_scores"])]
print('Best depth:', best_depth)
print('Test score:',
      RandomForestClassifier(n_estimators=100, max_depth=depth, n_jobs=-1, random_state=1)
      .fit(train.X.numpy, train.y.numpy)
      .score(test.X.numpy, test.y.numpy))

### K-Nearest Neighbors

In [None]:
scaler = StandardScaler()
scaler.fit(train.X.numpy)
X_tr_scaled = scaler.transform(train.X.numpy)
X_te_scaled = scaler.transform(test.X.numpy)
print("standard deviation average:", X_tr_scaled.std(axis=0).mean())
print("mean average:", X_tr_scaled.mean(axis=0).mean())

In [None]:
pca = PCA(n_components=0.95)
pca.fit(X_tr_scaled)
X_tr_scaled = pca.transform(X_tr_scaled)
X_te_scaled = pca.transform(X_te_scaled)
X_tr_scaled.shape, X_te_scaled.shape

In [None]:
Ks = np.arange(1, 10, 1) # grid search on a parameter of the model

# here we store all the scores obtained with the different number of neighbors
nearestNeig = {
    "tr_scores": [],
    "va_scores": []
}

for k in Ks:
    result = cross_validate(
        KNeighborsClassifier(n_neighbors=k), 
        X_tr_scaled, train.y.numpy, cv=10, return_train_score=True)
    
    nearestNeig["tr_scores"].append(np.mean(result["train_score"]))
    nearestNeig["va_scores"].append(np.mean(result["test_score"]))
    
plot_scores(Ks, "# Neighbors", nearestNeig["tr_scores"], nearestNeig["va_scores"], log_scale=False)

best_k = Ks[np.argmax(nearestNeig["va_scores"])]
print('Best k:', best_k)
print('Test score:', 
      KNeighborsClassifier(n_neighbors=k)
      .fit(X_tr_scaled, train.y.numpy)
      .score(X_te_scaled, test.y.numpy))

## Convolutional networks

In [None]:
# define the network with two convolutional layers
class CNN(Module, modelWrapper):
    def __init__(self, nb_hidden):
        self.nb_hidden = nb_hidden
        super(CNN, self).__init__()
        
        self.conv1 = Conv2d(1, 32, kernel_size=(3, 7), padding=(1, 3)) 
        self.conv2 = Conv2d(32, 64, kernel_size=5, padding=2)
        self.conv3 = Conv2d(64, 32, kernel_size=5, padding=2)
#         self.conv4 = Conv2d(32, 16, kernel_size=5, padding=2)
        
        self.fc1 = Linear(384, nb_hidden)
        self.fc2 = Linear(nb_hidden, 2)
        
        self.criterion = CrossEntropyLoss()
        self.optimizer = Adam(self.parameters(), lr=0.01)

    def forward(self, x):
        # (1, 28, 50)
        x = F.leaky_relu(F.max_pool2d(self.conv1(x), 2))
        # (32, 14, 25)
        x = F.leaky_relu(F.max_pool2d(self.conv2(x), (2, 5)))
        # (64, 7, 5)
        x = F.leaky_relu(F.max_pool2d(self.conv3(x), 2, padding=(1, 1)))
        # (32, 4, 3)
#         x = F.relu(self.conv4(x))
#         # (16, 4, 3)
        
        x = F.leaky_relu(self.fc1(x.view(-1, 384)))  
        # (1, nb_hidden)
        x = self.fc2(x)
        return x
    
    def reset(self):
        self.__init__(self.nb_hidden)

In [None]:
model = CNN(20)

In [None]:
model.fit(train.X.variable, train.y.variable, X_test=test.X.variable, y_test=test.y.variable, epochs=15)

In [None]:
# model.predict(train.X.variable).shape, train.X.variable.shape
# model.score(train.X.variable, train.y.variable)