In [1]:
from utils import *
from dataset import *
from constants import *
from models.BaseModels import *
from models.DeepModels import *
from models.DeepUtils import *

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from dataset import Participant
from torch.utils.data import DataLoader

random.seed(RANDOM_STATE)

In [2]:
part_name = 's6'
use_saved = True

In [3]:
if use_saved and os.path.exists(f'saved/{part_name}.pkl'):
    participant = Participant.load_from_pickle(f'saved/{part_name}.pkl')
else:
    participant = Participant(part_name, data_path=DATA_PATH_NOTEBOOK, alpha=ALPHA)
    saved_dir = os.path.join(os.getcwd(), 'saved')
    if not os.path.exists(saved_dir):
        os.makedirs(saved_dir)
    with open(f'saved/{part_name}.pkl', 'wb') as f:
        pickle.dump(participant, f, pickle.HIGHEST_PROTOCOL)

In [4]:
test_size = 0.3
pca_expl_var = 0.95

## Execution

<span style='color: red'>WARNING</span>: computing all features takes around 7 minutes (Intel Core i7-7700K)

In [5]:
if use_saved and os.path.exists(f'saved/ex_features_{part_name}_mvt.h5'):
    ex_features = pd.read_hdf(f'saved/ex_features_{part_name}_mvt.h5', 'df')
else:
    ex_features = participant.get_features_all_sessions_mvt('E')
    ex_features.to_hdf(f'saved/ex_features_{part_name}_mvt.h5', 'df', mode='w', data_columns=True)

In [6]:
print(f'The dataset contains {ex_features.shape[0]} samples and {ex_features.shape[1]} features.')
print(f'The {len(participant.relevant_channels_ex)} relevant channels are located in the following locations:')
regions = [participant.channels_locations[i] for i in [channel.idx for channel in participant.relevant_channels_ex]]
channels_per_regions = {}
for region in set(regions):
    channels_per_regions[region] = regions.count(region)
print(channels_per_regions)

The dataset contains 128 samples and 4897 features.
The 68 relevant channels are located in the following locations:
{'paracentral': 3, 'WM_paracentral': 1, 'precentral': 26, 'insula': 3, 'WM_insula': 1, 'superiorfrontal': 5, 'caudalmiddlefrontal': 4, 'supramarginal': 3, 'postcentral': 16, 'WM_precentral': 6}


Let's create a baseline by taking the same number of channels, but without checking whether they are responsive:

<span style='color: red'>WARNING</span>: computing all features takes around 7 minutes (Intel Core i7-7700K)

In [7]:
use_saved = False
if use_saved and os.path.exists(f'saved/ex_baseline_features_{part_name}_mvt.h5'):
    ex_baseline_features = pd.read_hdf(f'saved/ex_baseline_features_{part_name}_mvt.h5', 'df')
else:
    ex_baseline_features = participant.get_features_all_sessions_rnd(len(participant.relevant_channels_ex), movtype='E')
    ex_baseline_features.to_hdf(f'saved/ex_baseline_features_{part_name}_mvt.h5', 'df', mode='w', data_columns=True)
use_saved = True

random_channels [<dataset.Channel object at 0x000001A5DA994E60>, <dataset.Channel object at 0x000001A5DA994F80>, <dataset.Channel object at 0x000001A5DA9950A0>, <dataset.Channel object at 0x000001A5DA9951C0>, <dataset.Channel object at 0x000001A5DA9952E0>, <dataset.Channel object at 0x000001A5DA995E20>, <dataset.Channel object at 0x000001A5DA996960>, <dataset.Channel object at 0x000001A5DA996BA0>, <dataset.Channel object at 0x000001A5DA997020>, <dataset.Channel object at 0x000001A5DA997260>, <dataset.Channel object at 0x000001A5DA9975C0>, <dataset.Channel object at 0x000001A5DA9976E0>, <dataset.Channel object at 0x000001A5DA997800>, <dataset.Channel object at 0x000001A5DA997C80>, <dataset.Channel object at 0x000001A5DA997DA0>, <dataset.Channel object at 0x000001A5DA9EC4A0>, <dataset.Channel object at 0x000001A5DA9ECB60>, <dataset.Channel object at 0x000001A5DA9ECC80>, <dataset.Channel object at 0x000001A5DA9ECDA0>, <dataset.Channel object at 0x000001A5DA9ECEC0>, <dataset.Channel object

  ex_baseline_features.to_hdf(f'saved/ex_baseline_features_{part_name}_mvt.h5', 'df', mode='w', data_columns=True)


In [8]:
print(f'The baseline dataset contains {ex_baseline_features.shape[0]} samples and {ex_baseline_features.shape[1]} features.')

The baseline dataset contains 128 samples and 2737 features.


### Train a model on the baseline features (Logistic Regression)

In [8]:
X = ex_baseline_features.drop('label', axis=1)
y = ex_baseline_features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=RANDOM_STATE)

logreg = LogisticRegressionModel()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.64


## Let's now do the analysis for the responsive channels

In [30]:
X = ex_features.drop('label', axis=1)
y = ex_features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=RANDOM_STATE)

### Train a model (Logistic Regression)

In [31]:
logreg = LogisticRegressionModel()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.77


#### With PCA

In [12]:
logreg = LogisticRegressionModel(use_pca=True, expl_var=0.95)
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.69


### Train a model (SVM)

In [13]:
svm = SVMModel()
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.62


#### With PCA

In [14]:
svm = SVMModel(use_pca=True, expl_var=0.95)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.56


### Train a model (Random Forest)

In [15]:
svm = RandomForestModel()
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.54


### Train a model (Multi-Layer Perceptron)

In [16]:
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=RANDOM_STATE)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [17]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

mlp = MLP(X_train.shape[1], 2, layers=(16, 16))
trainset = DfDataset(X_train, y_train)
valset = DfDataset(X_val, y_val)
train_loader = DataLoader(trainset, batch_size=4, shuffle=True, num_workers=4)
val_loader = DataLoader(valset, batch_size=4, shuffle=False, num_workers=4)

trainer = Trainer(mlp, 0.1, 20, 4, save_path='saved/mlp.pth', device=device)
trainer.train(train_loader, val_loader)

Epoch 1/20 - Training: 100%|██████████| 23/23 [00:03<00:00,  6.56it/s]


Epoch: 1 	Training Loss: 9.571020 	Training Acc: 0.505618


Epoch 1/20 - Validation: 100%|██████████| 5/5 [00:02<00:00,  1.73it/s]


Epoch: 1 	Validation Loss: 11.290998 	Validation Acc: 0.421053
Validation loss decreased (inf --> 11.290998). Saving model ...


Epoch 2/20 - Training: 100%|██████████| 23/23 [00:03<00:00,  7.56it/s]


Epoch: 2 	Training Loss: 7.463178 	Training Acc: 0.584270


Epoch 2/20 - Validation: 100%|██████████| 5/5 [00:02<00:00,  1.73it/s]


Epoch: 2 	Validation Loss: 10.900400 	Validation Acc: 0.578947
Validation loss decreased (11.290998 --> 10.900400). Saving model ...


Epoch 3/20 - Training: 100%|██████████| 23/23 [00:03<00:00,  7.52it/s]


Epoch: 3 	Training Loss: 0.629903 	Training Acc: 0.584270


Epoch 3/20 - Validation: 100%|██████████| 5/5 [00:02<00:00,  1.74it/s]


Epoch: 3 	Validation Loss: 0.687331 	Validation Acc: 0.578947
Validation loss decreased (10.900400 --> 0.687331). Saving model ...


Epoch 4/20 - Training: 100%|██████████| 23/23 [00:02<00:00,  7.91it/s]


Epoch: 4 	Training Loss: 1.941021 	Training Acc: 0.573034


Epoch 4/20 - Validation: 100%|██████████| 5/5 [00:02<00:00,  1.80it/s]


Epoch: 4 	Validation Loss: 0.638111 	Validation Acc: 0.631579
Validation loss decreased (0.687331 --> 0.638111). Saving model ...


Epoch 5/20 - Training: 100%|██████████| 23/23 [00:02<00:00,  7.71it/s]


Epoch: 5 	Training Loss: 0.633878 	Training Acc: 0.483146


Epoch 5/20 - Validation: 100%|██████████| 5/5 [00:02<00:00,  1.72it/s]


Epoch: 5 	Validation Loss: 0.644054 	Validation Acc: 0.631579


Epoch 6/20 - Training: 100%|██████████| 23/23 [00:02<00:00,  7.72it/s]


Epoch: 6 	Training Loss: 0.633460 	Training Acc: 0.584270


Epoch 6/20 - Validation: 100%|██████████| 5/5 [00:02<00:00,  1.81it/s]


Epoch: 6 	Validation Loss: 0.638777 	Validation Acc: 0.631579


Epoch 7/20 - Training: 100%|██████████| 23/23 [00:02<00:00,  8.36it/s]


Epoch: 7 	Training Loss: 0.632138 	Training Acc: 0.584270


Epoch 7/20 - Validation: 100%|██████████| 5/5 [00:02<00:00,  1.83it/s]


Epoch: 7 	Validation Loss: 0.649297 	Validation Acc: 0.631579


Epoch 8/20 - Training: 100%|██████████| 23/23 [00:02<00:00,  8.01it/s]


Epoch: 8 	Training Loss: 0.647007 	Training Acc: 0.471910


Epoch 8/20 - Validation: 100%|██████████| 5/5 [00:03<00:00,  1.61it/s]


Epoch: 8 	Validation Loss: 0.636480 	Validation Acc: 0.631579
Validation loss decreased (0.638111 --> 0.636480). Saving model ...


Epoch 9/20 - Training: 100%|██████████| 23/23 [00:02<00:00,  8.31it/s]


Epoch: 9 	Training Loss: 0.641056 	Training Acc: 0.494382


Epoch 9/20 - Validation: 100%|██████████| 5/5 [00:02<00:00,  1.89it/s]


Epoch: 9 	Validation Loss: 0.654278 	Validation Acc: 0.631579


Epoch 10/20 - Training: 100%|██████████| 23/23 [00:02<00:00,  8.20it/s]


Epoch: 10 	Training Loss: 0.632320 	Training Acc: 0.584270


Epoch 10/20 - Validation: 100%|██████████| 5/5 [00:02<00:00,  1.93it/s]


Epoch: 10 	Validation Loss: 0.633112 	Validation Acc: 0.631579
Validation loss decreased (0.636480 --> 0.633112). Saving model ...


Epoch 11/20 - Training: 100%|██████████| 23/23 [00:02<00:00,  7.83it/s]


Epoch: 11 	Training Loss: 0.633306 	Training Acc: 0.584270


Epoch 11/20 - Validation: 100%|██████████| 5/5 [00:02<00:00,  1.85it/s]


Epoch: 11 	Validation Loss: 0.647183 	Validation Acc: 0.631579


Epoch 12/20 - Training: 100%|██████████| 23/23 [00:02<00:00,  8.31it/s]


Epoch: 12 	Training Loss: 0.638772 	Training Acc: 0.584270


Epoch 12/20 - Validation: 100%|██████████| 5/5 [00:02<00:00,  1.87it/s]


Epoch: 12 	Validation Loss: 0.640964 	Validation Acc: 0.631579


Epoch 13/20 - Training: 100%|██████████| 23/23 [00:02<00:00,  8.39it/s]


Epoch: 13 	Training Loss: 0.633588 	Training Acc: 0.584270


Epoch 13/20 - Validation: 100%|██████████| 5/5 [00:02<00:00,  1.83it/s]


Epoch: 13 	Validation Loss: 0.640590 	Validation Acc: 0.631579


Epoch 14/20 - Training: 100%|██████████| 23/23 [00:03<00:00,  7.19it/s]


Epoch: 14 	Training Loss: 0.633105 	Training Acc: 0.584270


Epoch 14/20 - Validation: 100%|██████████| 5/5 [00:02<00:00,  1.72it/s]


Epoch: 14 	Validation Loss: 0.644015 	Validation Acc: 0.631579


Epoch 15/20 - Training: 100%|██████████| 23/23 [00:03<00:00,  7.62it/s]


Epoch: 15 	Training Loss: 0.637305 	Training Acc: 0.584270


Epoch 15/20 - Validation: 100%|██████████| 5/5 [00:02<00:00,  1.79it/s]


Epoch: 15 	Validation Loss: 0.638382 	Validation Acc: 0.631579


Epoch 16/20 - Training: 100%|██████████| 23/23 [00:02<00:00,  7.67it/s]


Epoch: 16 	Training Loss: 0.647298 	Training Acc: 0.539326


Epoch 16/20 - Validation: 100%|██████████| 5/5 [00:02<00:00,  1.76it/s]


Epoch: 16 	Validation Loss: 0.656562 	Validation Acc: 0.631579


Epoch 17/20 - Training: 100%|██████████| 23/23 [00:02<00:00,  7.90it/s]


Epoch: 17 	Training Loss: 0.642339 	Training Acc: 0.539326


Epoch 17/20 - Validation: 100%|██████████| 5/5 [00:03<00:00,  1.63it/s]


Epoch: 17 	Validation Loss: 0.647217 	Validation Acc: 0.631579


Epoch 18/20 - Training: 100%|██████████| 23/23 [00:03<00:00,  7.57it/s]


Epoch: 18 	Training Loss: 0.631066 	Training Acc: 0.584270


Epoch 18/20 - Validation: 100%|██████████| 5/5 [00:02<00:00,  1.74it/s]


Epoch: 18 	Validation Loss: 0.637460 	Validation Acc: 0.631579


Epoch 19/20 - Training: 100%|██████████| 23/23 [00:02<00:00,  8.34it/s]


Epoch: 19 	Training Loss: 0.643950 	Training Acc: 0.471910


Epoch 19/20 - Validation: 100%|██████████| 5/5 [00:02<00:00,  1.85it/s]


Epoch: 19 	Validation Loss: 0.645797 	Validation Acc: 0.631579


Epoch 20/20 - Training: 100%|██████████| 23/23 [00:02<00:00,  8.03it/s]


Epoch: 20 	Training Loss: 0.649542 	Training Acc: 0.584270


Epoch 20/20 - Validation: 100%|██████████| 5/5 [00:02<00:00,  1.85it/s]

Epoch: 20 	Validation Loss: 0.644686 	Validation Acc: 0.631579





In [28]:
testset = DfDataset(X_test, y_test)
acc = 0
for input, label in testset:
    pred = trainer.model(input)
    if torch.argmax(pred) == label:
        acc += 1

acc /= len(testset)
print(f"Accuracy: {acc:.2f}")

Accuracy: 0.55


## Observation

<span style='color: red'>WARNING</span>: computing all features takes around 7 minutes (Intel Core i7-7700K)

In [9]:
if use_saved and os.path.exists(f'saved/obs_features_{part_name}_mvt.h5'):
    obs_features = pd.read_hdf(f'saved/obs_features_{part_name}_mvt.h5', 'df')
else:
    obs_features = participant.get_features_all_sessions_mvt('O')
    obs_features.to_hdf(f'saved/obs_features_{part_name}_mvt.h5', 'df', mode='w', data_columns=True)

In [10]:
print(f'The dataset contains {obs_features.shape[0]} samples and {obs_features.shape[1]} features.')

The dataset contains 128 samples and 3673 features.


Let's create a baseline by taking the same number of channels, but without checking whether they are responsive:

In [11]:
use_saved = False
if use_saved and os.path.exists(f'saved/obs_baseline_features_{part_name}_mvt.h5'):
    obs_baseline_features = pd.read_hdf(f'saved/obs_baseline_features_{part_name}_mvt.h5', 'df')
else:
    obs_baseline_features = participant.get_features_all_sessions_rnd(len(participant.relevant_channels_obs), movtype='O')
    obs_baseline_features.to_hdf(f'saved/obs_baseline_features_{part_name}_mvt.h5', 'df', mode='w', data_columns=True)
use_saved = True

random_channels [<dataset.Channel object at 0x000001A5DA994E60>, <dataset.Channel object at 0x000001A5DA994F80>, <dataset.Channel object at 0x000001A5DA9950A0>, <dataset.Channel object at 0x000001A5DA9951C0>, <dataset.Channel object at 0x000001A5DA9952E0>, <dataset.Channel object at 0x000001A5DA995E20>, <dataset.Channel object at 0x000001A5DA996960>, <dataset.Channel object at 0x000001A5DA996BA0>, <dataset.Channel object at 0x000001A5DA997020>, <dataset.Channel object at 0x000001A5DA997260>, <dataset.Channel object at 0x000001A5DA9975C0>, <dataset.Channel object at 0x000001A5DA9976E0>, <dataset.Channel object at 0x000001A5DA997800>, <dataset.Channel object at 0x000001A5DA997C80>, <dataset.Channel object at 0x000001A5DA997DA0>, <dataset.Channel object at 0x000001A5DA9EC4A0>, <dataset.Channel object at 0x000001A5DA9ECB60>, <dataset.Channel object at 0x000001A5DA9ECC80>, <dataset.Channel object at 0x000001A5DA9ECDA0>, <dataset.Channel object at 0x000001A5DA9ECEC0>, <dataset.Channel object

  obs_baseline_features.to_hdf(f'saved/obs_baseline_features_{part_name}_mvt.h5', 'df', mode='w', data_columns=True)


In [14]:
print(f'The baseline dataset contains {ex_baseline_features.shape[0]} samples and {ex_baseline_features.shape[1]} features.')

The baseline dataset contains 128 samples and 2737 features.


### Train a model on the baseline features (Logistic Regression)

In [15]:
X = obs_baseline_features.drop('label', axis=1)
y = obs_baseline_features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=RANDOM_STATE)

logreg = LogisticRegressionModel()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.49


## Let's now do the analysis for the responsive channels

In [16]:
X = obs_features.drop('label', axis=1)
y = obs_features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=RANDOM_STATE)

### Train a model (Logistic Regression)

In [17]:
logreg = LogisticRegressionModel()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.62


#### With PCA

In [33]:
logreg = LogisticRegressionModel(use_pca=True, expl_var=0.95)
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.51


### Train a model (SVM)

In [34]:
svm = SVMModel()
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.51


#### With PCA

In [35]:
svm = SVMModel(use_pca=True, expl_var=0.95)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.56


### Train a model (Random Forest)

In [36]:
svm = RandomForestModel()
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.59
