In [1]:
from utils import *
from dataset import *
from constants import *
from models.BaseModels import *
from models.DeepModels import *
from models.DeepUtils import *

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from dataset import Participant
from torch.utils.data import DataLoader

random.seed(RANDOM_STATE)

In [7]:
use_saved = False

In [8]:
if use_saved:
    participant = Participant.load_from_pickle('saved/s6.pkl')
else:
    participant = Participant('s6', data_path=DATA_PATH_NOTEBOOK, alpha=0.05)
    with open('saved/s6.pkl', 'wb') as f:
        pickle.dump(participant, f, pickle.HIGHEST_PROTOCOL)

In [9]:
test_size = 0.3
pca_expl_var = 0.95

## Execution

<span style='color: red'>WARNING</span>: computing all features takes around 7 minutes (Intel Core i7-7700K)

In [5]:
if use_saved:
    ex_features = pd.read_hdf('saved/ex_features.h5', 'df')
else:
    ex_features = participant.get_features_all_sessions_mvt('E')
    ex_features.to_hdf('saved/ex_features.h5', 'df', mode='w', data_columns=True)

In [6]:
print(f'The dataset contains {ex_features.shape[0]} samples and {ex_features.shape[1]} features.')
print(f'The {len(participant.relevant_channels_ex)} relevant channels are located in the following locations:')
regions = [participant.channels_locations[i] for i in [channel.idx for channel in participant.relevant_channels_ex]]
channels_per_regions = {}
for region in set(regions):
    channels_per_regions[region] = regions.count(region)
print(channels_per_regions)

The dataset contains 128 samples and 4897 features.
The 68 relevant channels are located in the following locations:
{'caudalmiddlefrontal': 4, 'WM_paracentral': 1, 'insula': 3, 'precentral': 26, 'WM_insula': 1, 'WM_precentral': 6, 'supramarginal': 3, 'paracentral': 3, 'superiorfrontal': 5, 'postcentral': 16}


Let's create a baseline by taking the same number of channels, but without checking whether they are responsive:

<span style='color: red'>WARNING</span>: computing all features takes around 7 minutes (Intel Core i7-7700K)

In [7]:
if use_saved:
    ex_baseline_features = pd.read_hdf('saved/ex_baseline_features.h5', 'df')
else:
    ex_baseline_features = participant.get_features_all_sessions_rnd(len(participant.relevant_channels_ex), movtype='E')
    ex_baseline_features.to_hdf('saved/ex_baseline_features.h5', 'df', mode='w', data_columns=True)

In [8]:
print(f'The baseline dataset contains {ex_baseline_features.shape[0]} samples and {ex_baseline_features.shape[1]} features.')

The baseline dataset contains 128 samples and 4897 features.


### Train a model on the baseline features (Logistic Regression)

In [9]:
X = ex_baseline_features.drop('label', axis=1)
y = ex_baseline_features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=RANDOM_STATE)

logreg = LogisticRegressionModel()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.77


## Let's now do the analysis for the responsive channels

In [6]:
X = ex_features.drop('label', axis=1)
y = ex_features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=RANDOM_STATE)

### Train a model (Logistic Regression)

In [11]:
logreg = LogisticRegressionModel()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.77


#### With PCA

In [12]:
logreg = LogisticRegressionModel(use_pca=True, expl_var=0.95)
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.69


### Train a model (SVM)

In [13]:
svm = SVMModel()
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.62


#### With PCA

In [14]:
svm = SVMModel(use_pca=True, expl_var=0.95)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.51


### Train a model (Random Forest)

In [15]:
svm = RandomForestModel()
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.62


### Train a model (Multi-Layer Perceptron)

In [7]:
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=RANDOM_STATE)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

mlp = MLP(X_train.shape[1], 2, layers=(16, 16))
trainset = DfDataset(X_train, y_train)
valset = DfDataset(X_val, y_val)
train_loader = DataLoader(trainset, batch_size=4, shuffle=True, num_workers=4)
val_loader = DataLoader(valset, batch_size=4, shuffle=False, num_workers=4)

trainer = Trainer(mlp, 0.1, 20, 4, save_path='saved/mlp.pth', device=device)
trainer.train(train_loader, val_loader)

Epoch 1/20 - Training: 100%|██████████| 23/23 [00:05<00:00,  3.92it/s]


Epoch: 1 	Training Loss: 37.176862 	Training Acc: 0.483146


Epoch 1/20 - Validation: 100%|██████████| 5/5 [00:05<00:00,  1.12s/it]


Epoch: 1 	Validation Loss: 14.961264 	Validation Acc: 0.578947
Validation loss decreased (inf --> 14.961264). Saving model ...


Epoch 2/20 - Training: 100%|██████████| 23/23 [00:06<00:00,  3.80it/s]


Epoch: 2 	Training Loss: 12.439016 	Training Acc: 0.573034


Epoch 2/20 - Validation: 100%|██████████| 5/5 [00:05<00:00,  1.15s/it]


Epoch: 2 	Validation Loss: 0.583965 	Validation Acc: 0.526316
Validation loss decreased (14.961264 --> 0.583965). Saving model ...


Epoch 3/20 - Training: 100%|██████████| 23/23 [00:05<00:00,  4.03it/s]


Epoch: 3 	Training Loss: 0.737775 	Training Acc: 0.528090


Epoch 3/20 - Validation: 100%|██████████| 5/5 [00:05<00:00,  1.14s/it]


Epoch: 3 	Validation Loss: 0.669592 	Validation Acc: 0.473684


Epoch 4/20 - Training: 100%|██████████| 23/23 [00:06<00:00,  3.75it/s]


Epoch: 4 	Training Loss: 0.688911 	Training Acc: 0.449438


Epoch 4/20 - Validation: 100%|██████████| 5/5 [00:05<00:00,  1.08s/it]


Epoch: 4 	Validation Loss: 0.651874 	Validation Acc: 0.578947


Epoch 5/20 - Training: 100%|██████████| 23/23 [00:05<00:00,  4.07it/s]


Epoch: 5 	Training Loss: 0.672107 	Training Acc: 0.539326


Epoch 5/20 - Validation: 100%|██████████| 5/5 [00:05<00:00,  1.14s/it]


Epoch: 5 	Validation Loss: 0.678135 	Validation Acc: 0.473684


Epoch 6/20 - Training: 100%|██████████| 23/23 [00:06<00:00,  3.52it/s]


Epoch: 6 	Training Loss: 0.695630 	Training Acc: 0.528090


Epoch 6/20 - Validation: 100%|██████████| 5/5 [00:05<00:00,  1.07s/it]


Epoch: 6 	Validation Loss: 0.659945 	Validation Acc: 0.473684


Epoch 7/20 - Training: 100%|██████████| 23/23 [00:05<00:00,  4.40it/s]


Epoch: 7 	Training Loss: 0.702924 	Training Acc: 0.483146


Epoch 7/20 - Validation: 100%|██████████| 5/5 [00:06<00:00,  1.26s/it]


Epoch: 7 	Validation Loss: 0.652447 	Validation Acc: 0.578947


Epoch 8/20 - Training: 100%|██████████| 23/23 [00:06<00:00,  3.81it/s]


Epoch: 8 	Training Loss: 0.691020 	Training Acc: 0.528090


Epoch 8/20 - Validation: 100%|██████████| 5/5 [00:05<00:00,  1.01s/it]


Epoch: 8 	Validation Loss: 0.663220 	Validation Acc: 0.473684


Epoch 9/20 - Training: 100%|██████████| 23/23 [00:05<00:00,  4.30it/s]


Epoch: 9 	Training Loss: 0.678531 	Training Acc: 0.471910


Epoch 9/20 - Validation: 100%|██████████| 5/5 [00:05<00:00,  1.09s/it]


Epoch: 9 	Validation Loss: 0.656105 	Validation Acc: 0.578947


Epoch 10/20 - Training: 100%|██████████| 23/23 [00:05<00:00,  4.11it/s]


Epoch: 10 	Training Loss: 0.694310 	Training Acc: 0.528090


Epoch 10/20 - Validation: 100%|██████████| 5/5 [00:05<00:00,  1.05s/it]


Epoch: 10 	Validation Loss: 0.656134 	Validation Acc: 0.578947


Epoch 11/20 - Training: 100%|██████████| 23/23 [00:05<00:00,  4.23it/s]


Epoch: 11 	Training Loss: 0.693133 	Training Acc: 0.516854


Epoch 11/20 - Validation: 100%|██████████| 5/5 [00:05<00:00,  1.07s/it]


Epoch: 11 	Validation Loss: 0.667356 	Validation Acc: 0.473684


Epoch 12/20 - Training: 100%|██████████| 23/23 [00:06<00:00,  3.78it/s]


Epoch: 12 	Training Loss: 0.711655 	Training Acc: 0.528090


Epoch 12/20 - Validation: 100%|██████████| 5/5 [00:05<00:00,  1.10s/it]


Epoch: 12 	Validation Loss: 0.660941 	Validation Acc: 0.473684


Epoch 13/20 - Training: 100%|██████████| 23/23 [00:05<00:00,  4.26it/s]


Epoch: 13 	Training Loss: 0.713227 	Training Acc: 0.483146


Epoch 13/20 - Validation: 100%|██████████| 5/5 [00:05<00:00,  1.05s/it]


Epoch: 13 	Validation Loss: 0.658134 	Validation Acc: 0.473684


Epoch 14/20 - Training: 100%|██████████| 23/23 [00:05<00:00,  4.10it/s]


Epoch: 14 	Training Loss: 0.694355 	Training Acc: 0.528090


Epoch 14/20 - Validation: 100%|██████████| 5/5 [00:05<00:00,  1.07s/it]


Epoch: 14 	Validation Loss: 0.657592 	Validation Acc: 0.473684


Epoch 15/20 - Training: 100%|██████████| 23/23 [00:05<00:00,  4.07it/s]


Epoch: 15 	Training Loss: 0.685897 	Training Acc: 0.528090


Epoch 15/20 - Validation: 100%|██████████| 5/5 [00:05<00:00,  1.06s/it]


Epoch: 15 	Validation Loss: 0.661933 	Validation Acc: 0.473684


Epoch 16/20 - Training: 100%|██████████| 23/23 [00:05<00:00,  4.11it/s]


Epoch: 16 	Training Loss: 0.677297 	Training Acc: 0.528090


Epoch 16/20 - Validation: 100%|██████████| 5/5 [00:05<00:00,  1.17s/it]


Epoch: 16 	Validation Loss: 0.651362 	Validation Acc: 0.578947


Epoch 17/20 - Training: 100%|██████████| 23/23 [00:05<00:00,  3.91it/s]


Epoch: 17 	Training Loss: 0.686944 	Training Acc: 0.494382


Epoch 17/20 - Validation: 100%|██████████| 5/5 [00:05<00:00,  1.16s/it]


Epoch: 17 	Validation Loss: 0.661322 	Validation Acc: 0.473684


Epoch 18/20 - Training: 100%|██████████| 23/23 [00:05<00:00,  4.05it/s]


Epoch: 18 	Training Loss: 0.689788 	Training Acc: 0.415730


Epoch 18/20 - Validation: 100%|██████████| 5/5 [00:06<00:00,  1.24s/it]


Epoch: 18 	Validation Loss: 0.672370 	Validation Acc: 0.473684


Epoch 19/20 - Training: 100%|██████████| 23/23 [00:06<00:00,  3.39it/s]


Epoch: 19 	Training Loss: 0.676569 	Training Acc: 0.528090


Epoch 19/20 - Validation: 100%|██████████| 5/5 [00:06<00:00,  1.27s/it]


Epoch: 19 	Validation Loss: 0.658050 	Validation Acc: 0.473684


Epoch 20/20 - Training: 100%|██████████| 23/23 [00:06<00:00,  3.31it/s]


Epoch: 20 	Training Loss: 0.679529 	Training Acc: 0.528090


Epoch 20/20 - Validation: 100%|██████████| 5/5 [00:05<00:00,  1.07s/it]

Epoch: 20 	Validation Loss: 0.655745 	Validation Acc: 0.578947





In [17]:
testset = DfDataset(X_test, y_test)
acc = 0
for input, label in testset:
    pred = trainer.model(input)
    if torch.argmax(pred) == label:
        acc += 1

acc /= len(testset)
print(f"Accuracy: {acc:.2f}")

Accuracy: 0.55


## Observation

<span style='color: red'>WARNING</span>: computing all features takes around 7 minutes (Intel Core i7-7700K)

In [None]:
if use_saved:
    obs_features = pd.read_hdf('saved/obs_features.h5', 'df')
else:
    obs_features = participant.get_features_all_sessions_mvt('O')
    obs_features.to_hdf('saved/obs_features.h5', 'df', mode='w', data_columns=True)

100%|██████████| 51/51 [00:03<00:00, 14.79it/s]
100%|██████████| 51/51 [00:02<00:00, 19.41it/s]
100%|██████████| 51/51 [00:04<00:00, 12.15it/s]
100%|██████████| 51/51 [00:03<00:00, 16.93it/s]
100%|██████████| 51/51 [00:03<00:00, 16.81it/s]
100%|██████████| 51/51 [00:02<00:00, 20.05it/s]
100%|██████████| 51/51 [00:02<00:00, 23.71it/s]]
100%|██████████| 51/51 [00:02<00:00, 22.41it/s]]
100%|██████████| 51/51 [00:03<00:00, 13.51it/s]]
100%|██████████| 51/51 [00:02<00:00, 17.20it/s]]
100%|██████████| 51/51 [00:03<00:00, 14.76it/s]]
100%|██████████| 51/51 [00:02<00:00, 22.37it/s]]
100%|██████████| 51/51 [00:02<00:00, 23.64it/s]]
100%|██████████| 51/51 [00:02<00:00, 22.34it/s]]
100%|██████████| 51/51 [00:03<00:00, 13.45it/s]]
100%|██████████| 51/51 [00:02<00:00, 20.16it/s]]
100%|██████████| 51/51 [00:02<00:00, 18.51it/s]]
100%|██████████| 51/51 [00:02<00:00, 22.31it/s]]
100%|██████████| 51/51 [00:02<00:00, 17.97it/s]]
100%|██████████| 51/51 [00:02<00:00, 20.70it/s]]
100%|██████████| 51/51 [00

In [None]:
print(f'The dataset contains {obs_features.shape[0]} samples and {obs_features.shape[1]} features.')

The dataset contains 128 samples and 2881 features.


## Let's now do the analysis for the responsive channels

In [None]:
X = obs_features.drop('label', axis=1)
y = obs_features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=RANDOM_STATE)

### Train a model (Logistic Regression)

In [None]:
logreg = LogisticRegressionModel()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.77


#### With PCA

In [None]:
logreg = LogisticRegressionModel(use_pca=True, expl_var=0.95)
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.69


### Train a model (SVM)

In [None]:
svm = SVMModel()
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.62


#### With PCA

In [None]:
svm = SVMModel(use_pca=True, expl_var=0.95)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.51


### Train a model (Random Forest)

In [None]:
svm = RandomForestModel()
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.54
