In [1]:
from utils import *
from dataset import *
from constants import *
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

from dataset import Participant

#### Select participant and session (session only for plot purposes)

In [2]:
s6 = Participant('s6', data_path='../Data/Dataset_4subjects_Exe_Obs')

In [3]:
print(f'Number of sessions: {len(s6.sessions)}')
print(f'Number of channels: {len(s6.channels)}')
print(f'Number of relevant channels: {len(s6.relevant_channels)}')
print(f'The relevant channels are located in the following locations:')
print([s6.channels_locations[i] for i in [channel.idx for channel in s6.relevant_channels]])

Number of sessions: 1
Number of channels: 106
Number of relevant channels: 12
The relevant channels are located in the following locations:
['paracentral', 'WM_precentral', 'caudalmiddlefrontal', 'caudalmiddlefrontal', 'supramarginal', 'supramarginal', 'postcentral', 'WM_insula', 'precentral', 'precentral', 'paracentral', 'precentral']


#### Get participant's features

In [3]:
features = s6.get_features_all_sessions()

Compute PSD for each baselines and activities, then mean it

In [4]:
print(f'The dataset contains {features.shape[0]} samples and {features.shape[1]} features.')

The dataset contains 256 samples and 20305 features.


## Train a model (SVM)

#### Start without any dimensionality reduction

In [7]:
X = features.drop('label', axis=1)
y = features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train SVM
parameters = {'C': [0.1, 1, 10, 100, 1000], 'kernel': ['linear', 'rbf', 'sigmoid']}
svm = SVC()
clf = GridSearchCV(svm, parameters)
clf.fit(X_train, y_train)
print(clf.best_params_)

# Test SVM
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

{'C': 1, 'kernel': 'rbf'}
Accuracy: 1.00


#### With PCA

In [8]:
from sklearn.decomposition import PCA

X = features.drop('label', axis=1)
y = features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

pca = PCA(n_components=100)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# Train SVM
parameters = {'C': [0.1, 1, 10, 100, 1000], 'kernel': ['linear', 'rbf', 'sigmoid']}
svm = SVC()
clf = GridSearchCV(svm, parameters)
clf.fit(X_train_pca, y_train)
print(clf.best_params_)

# Test SVM
y_pred = clf.predict(X_test_pca)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

{'C': 1, 'kernel': 'rbf'}
Accuracy: 0.98


## Train a model (Random Forest)

In [9]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train Random Forest
n_estimators = [10, 50, 90, 130]
max_depth = [10, 25, 50]
param_grid = {'n_estimators': n_estimators, 'max_depth': max_depth}

rf = RandomForestClassifier() 
clf = GridSearchCV(rf, param_grid)
clf.fit(X_train, y_train)
print(clf.best_params_)

# Test Random Forest
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")


{'max_depth': 10, 'n_estimators': 10}
Accuracy: 0.98
