In [1]:
from utils import *
from dataset import *
from constants import *
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

from dataset import Participant

In [None]:
participant = Participant('s6', data_path=DATA_PATH_NOTEBOOK)

In [26]:
test_size = 0.3
nb_pca_components = 50

## Execution

In [3]:
movtype = 'E'
ex_features = participant.get_features_all_sessions_mvt(movtype)

Loading features for 1 sessions...


100%|██████████| 40/40 [00:02<00:00, 16.00it/s]
100%|██████████| 40/40 [00:01<00:00, 24.04it/s]
100%|██████████| 40/40 [00:01<00:00, 25.84it/s]
100%|██████████| 40/40 [00:02<00:00, 15.67it/s]
100%|██████████| 40/40 [00:02<00:00, 19.37it/s]
100%|██████████| 40/40 [00:01<00:00, 28.65it/s]]
100%|██████████| 40/40 [00:01<00:00, 20.46it/s]]
100%|██████████| 40/40 [00:01<00:00, 23.51it/s]]
100%|██████████| 40/40 [00:02<00:00, 15.46it/s]]
100%|██████████| 40/40 [00:02<00:00, 15.90it/s]]
100%|██████████| 40/40 [00:01<00:00, 26.86it/s]]
100%|██████████| 40/40 [00:01<00:00, 25.80it/s]]
100%|██████████| 40/40 [00:01<00:00, 24.42it/s]]
100%|██████████| 40/40 [00:01<00:00, 21.08it/s]]
100%|██████████| 40/40 [00:02<00:00, 14.41it/s]]
100%|██████████| 40/40 [00:01<00:00, 25.74it/s]]
100%|██████████| 40/40 [00:01<00:00, 23.19it/s]]
100%|██████████| 40/40 [00:01<00:00, 25.10it/s]]
100%|██████████| 40/40 [00:02<00:00, 16.96it/s]]
100%|██████████| 40/40 [00:01<00:00, 22.52it/s]]
100%|██████████| 40/40 [0

In [4]:
print(f'The dataset contains {ex_features.shape[0]} samples and {ex_features.shape[1]} features.')

The dataset contains 128 samples and 2881 features.


## Train a model (Logistic Regression as baseline)

In [19]:
X = ex_features.drop('label', axis=1)
y = ex_features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=RANDOM_STATE)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train logistic regression
parameters = {'penalty': ['l1', 'l2']}
model = LogisticRegression(solver='liblinear')
clf = GridSearchCV(model, parameters)
clf.fit(X_train, y_train)
print(clf.best_params_)

# Test SVM
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

{'penalty': 'l2'}
Accuracy: 0.62


#### With PCA

In [None]:
X = ex_features.drop('label', axis=1)
y = ex_features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=RANDOM_STATE)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

pca = PCA(n_components=nb_pca_components)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# Train logistic regression
parameters = {'penalty': ['l1', 'l2']}
model = LogisticRegression(solver='liblinear')
clf = GridSearchCV(model, parameters)
clf.fit(X_train_pca, y_train)
print(clf.best_params_)

# Test SVM
y_pred = clf.predict(X_test_pca)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

{'penalty': 'l1'}
Accuracy: 0.56


## Train a model (SVM)

In [22]:
X = ex_features.drop('label', axis=1)
y = ex_features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=RANDOM_STATE)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train SVM
parameters = {'C': [0.1, 1, 10, 100, 1000], 'kernel': ['linear', 'rbf', 'sigmoid']}
svm = SVC()
clf = GridSearchCV(svm, parameters)
clf.fit(X_train, y_train)
print(clf.best_params_)

# Test SVM
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

{'C': 10, 'kernel': 'sigmoid'}
Accuracy: 0.64


#### With PCA

In [None]:
X = ex_features.drop('label', axis=1)
y = ex_features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=RANDOM_STATE)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

pca = PCA(n_components=nb_pca_components)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# Train SVM
parameters = {'C': [0.1, 1, 10, 100, 1000], 'kernel': ['linear', 'rbf', 'sigmoid']}
svm = SVC()
clf = GridSearchCV(svm, parameters)
clf.fit(X_train_pca, y_train)
print(clf.best_params_)

# Test SVM
y_pred = clf.predict(X_test_pca)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

{'C': 10, 'kernel': 'sigmoid'}
Accuracy: 0.56


## Train a model (Random Forest)

In [25]:
X = ex_features.drop('label', axis=1)
y = ex_features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=RANDOM_STATE)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train Random Forest
n_estimators = [10, 50, 90, 130]
max_depth = [10, 25, 50]
param_grid = {'n_estimators': n_estimators, 'max_depth': max_depth}

rf = RandomForestClassifier() 
clf = GridSearchCV(rf, param_grid)
clf.fit(X_train, y_train)
print(clf.best_params_)

# Test Random Forest
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")


{'max_depth': 25, 'n_estimators': 90}
Accuracy: 0.59


## Observation

In [10]:
movtype = 'O'
obs_features = participant.get_features_all_sessions_mvt(movtype)

Loading features for 1 sessions...


100%|██████████| 40/40 [00:01<00:00, 21.02it/s]
100%|██████████| 40/40 [00:01<00:00, 24.17it/s]
100%|██████████| 40/40 [00:02<00:00, 16.17it/s]
100%|██████████| 40/40 [00:02<00:00, 14.71it/s]
100%|██████████| 40/40 [00:02<00:00, 17.29it/s]
100%|██████████| 40/40 [00:01<00:00, 24.78it/s]
100%|██████████| 40/40 [00:01<00:00, 27.08it/s]]
100%|██████████| 40/40 [00:01<00:00, 28.18it/s]]
100%|██████████| 40/40 [00:02<00:00, 17.84it/s]]
100%|██████████| 40/40 [00:01<00:00, 23.21it/s]]
100%|██████████| 40/40 [00:02<00:00, 17.55it/s]]
100%|██████████| 40/40 [00:01<00:00, 25.54it/s]]
100%|██████████| 40/40 [00:01<00:00, 23.37it/s]]
100%|██████████| 40/40 [00:01<00:00, 28.72it/s]]
100%|██████████| 40/40 [00:02<00:00, 15.27it/s]]
100%|██████████| 40/40 [00:01<00:00, 22.03it/s]]
100%|██████████| 40/40 [00:02<00:00, 18.18it/s]]
100%|██████████| 40/40 [00:01<00:00, 22.01it/s]]
100%|██████████| 40/40 [00:01<00:00, 22.23it/s]]
100%|██████████| 40/40 [00:01<00:00, 26.04it/s]]
100%|██████████| 40/40 [00

In [11]:
print(f'The dataset contains {obs_features.shape[0]} samples and {obs_features.shape[1]} features.')

The dataset contains 128 samples and 2881 features.


## Train a model (Logistic Regression as baseline)

In [27]:
X = obs_features.drop('label', axis=1)
y = obs_features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=RANDOM_STATE)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train logistic regression
parameters = {'penalty': ['l1', 'l2']}
model = LogisticRegression(solver='liblinear')
clf = GridSearchCV(model, parameters)
clf.fit(X_train, y_train)
print(clf.best_params_)

# Test SVM
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

{'penalty': 'l2'}
Accuracy: 0.62


#### With PCA

In [28]:
X = obs_features.drop('label', axis=1)
y = obs_features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=RANDOM_STATE)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

pca = PCA(n_components=nb_pca_components)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# Train logistic regression
parameters = {'penalty': ['l1', 'l2']}
model = LogisticRegression(solver='liblinear')
clf = GridSearchCV(model, parameters)
clf.fit(X_train_pca, y_train)
print(clf.best_params_)

# Test SVM
y_pred = clf.predict(X_test_pca)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

{'penalty': 'l2'}
Accuracy: 0.72


## Train a model (SVM)

In [29]:
X = obs_features.drop('label', axis=1)
y = obs_features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=RANDOM_STATE)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train SVM
parameters = {'C': [0.1, 1, 10, 100, 1000], 'kernel': ['linear', 'rbf', 'sigmoid']}
svm = SVC()
clf = GridSearchCV(svm, parameters)
clf.fit(X_train, y_train)
print(clf.best_params_)

# Test SVM
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

{'C': 1, 'kernel': 'sigmoid'}
Accuracy: 0.62


#### With PCA

In [30]:
X = obs_features.drop('label', axis=1)
y = obs_features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=RANDOM_STATE)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

pca = PCA(n_components=nb_pca_components)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# Train SVM
parameters = {'C': [0.1, 1, 10, 100, 1000], 'kernel': ['linear', 'rbf', 'sigmoid']}
svm = SVC()
clf = GridSearchCV(svm, parameters)
clf.fit(X_train_pca, y_train)
print(clf.best_params_)

# Test SVM
y_pred = clf.predict(X_test_pca)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

{'C': 1, 'kernel': 'sigmoid'}
Accuracy: 0.67


## Train a model (Random Forest)

In [31]:
X = obs_features.drop('label', axis=1)
y = obs_features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=RANDOM_STATE)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train Random Forest
n_estimators = [10, 50, 90, 130]
max_depth = [10, 25, 50]
param_grid = {'n_estimators': n_estimators, 'max_depth': max_depth}

rf = RandomForestClassifier() 
clf = GridSearchCV(rf, param_grid)
clf.fit(X_train, y_train)
print(clf.best_params_)

# Test Random Forest
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")


{'max_depth': 25, 'n_estimators': 130}
Accuracy: 0.82
