In [16]:
from utils import *
from dataset import *
from constants import *
from models.BaseModels import *
from models.DeepModels import *
from models.DeepUtils import *

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from dataset import Participant
from torch.utils.data import DataLoader

random.seed(RANDOM_STATE)

In [2]:
use_saved = True

In [3]:
if use_saved:
    participant = Participant.load_from_pickle('saved/s6.pkl')
else:
    participant = Participant('s6', data_path=DATA_PATH_NOTEBOOK, alpha=0.05)
    with open('saved/s6.pkl', 'wb') as f:
        pickle.dump(participant, f, pickle.HIGHEST_PROTOCOL)

In [4]:
test_size = 0.3
pca_expl_var = 0.95

## Execution

<span style='color: red'>WARNING</span>: computing all features takes around 7 minutes (Intel Core i7-7700K)

In [17]:
use_saved = False
if use_saved:
    ex_features = pd.read_hdf('saved/ex_features.h5', 'df')
else:
    ex_features = participant.get_features_all_sessions_mvt('E')
    ex_features.to_hdf('saved/ex_features.h5', 'df', mode='w', data_columns=True)

100%|██████████| 256/256 [09:41<00:00,  2.27s/it]
  ex_features.to_hdf('saved/ex_features.h5', 'df', mode='w', data_columns=True)


In [6]:
print(f'The dataset contains {ex_features.shape[0]} samples and {ex_features.shape[1]} features.')
print(f'The {len(participant.relevant_channels_bigsmall)} relevant channels are located in the following locations:')
regions = [participant.channels_locations[i] for i in [channel.idx for channel in participant.relevant_channels_bigsmall]]
channels_per_regions = {}
for region in set(regions):
    channels_per_regions[region] = regions.count(region)
print(channels_per_regions)

The dataset contains 128 samples and 4321 features.
The 60 relevant channels are located in the following locations:
{'precentral': 21, 'insula': 3, 'paracentral': 2, 'superiorfrontal': 6, 'caudalmiddlefrontal': 6, 'supramarginal': 2, 'postcentral': 14, 'WM_precentral': 6}


Let's create a baseline by taking the same number of channels, but without checking whether they are responsive:

<span style='color: red'>WARNING</span>: computing all features takes around 7 minutes (Intel Core i7-7700K)

In [7]:
if use_saved:
    ex_baseline_features = pd.read_hdf('saved/ex_baseline_features.h5', 'df')
else:
    ex_baseline_features = participant.get_features_all_sessions_rnd(len(participant.relevant_channels_ex), movtype='E')
    ex_baseline_features.to_hdf('saved/ex_baseline_features.h5', 'df', mode='w', data_columns=True)

In [8]:
print(f'The baseline dataset contains {ex_baseline_features.shape[0]} samples and {ex_baseline_features.shape[1]} features.')

The baseline dataset contains 128 samples and 4897 features.


### Train a model on the baseline features (Logistic Regression)

In [None]:
X = ex_baseline_features.drop('label', axis=1)
y = ex_baseline_features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=RANDOM_STATE)

logreg = LogisticRegressionModel()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

## Let's now do the analysis for the responsive channels

In [19]:
X = ex_features.drop('label', axis=1)
y = ex_features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=RANDOM_STATE)

### Train a model (Logistic Regression)

In [21]:
logreg = LogisticRegressionModel()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.67


#### With PCA

In [22]:
logreg = LogisticRegressionModel(use_pca=True, expl_var=0.95)
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.67


### Train a model (SVM)

In [23]:
svm = SVMModel()
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.51


#### With PCA

In [24]:
svm = SVMModel(use_pca=True, expl_var=0.95)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.51


### Train a model (Random Forest)

In [25]:
svm = RandomForestModel()
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.51


### Train a model (Multi-Layer Perceptron)

In [26]:
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=RANDOM_STATE)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

mlp = MLP(X_train.shape[1], 2, layers=(16, 16))
trainset = DfDataset(X_train, y_train)
valset = DfDataset(X_val, y_val)
train_loader = DataLoader(trainset, batch_size=4, shuffle=True, num_workers=4)
val_loader = DataLoader(valset, batch_size=4, shuffle=False, num_workers=4)

trainer = Trainer(mlp, 0.001, 20, 4, save_path='saved/mlp.pth', device=device)
trainer.train(train_loader, val_loader)

In [26]:
testset = DfDataset(X_test, y_test)
acc = 0
for input, label in testset:
    pred = trainer.model(input)
    if torch.argmax(pred) == label:
        acc += 1

acc /= len(testset)
print(f"Accuracy: {acc:.2f}")

Accuracy: 0.40


## Observation

<span style='color: red'>WARNING</span>: computing all features takes around 7 minutes (Intel Core i7-7700K)

In [None]:
if use_saved:
    obs_features = pd.read_hdf('saved/obs_features.h5', 'df')
else:
    obs_features = participant.get_features_all_sessions_mvt('O')
    obs_features.to_hdf('saved/obs_features.h5', 'df', mode='w', data_columns=True)

100%|██████████| 51/51 [00:03<00:00, 14.79it/s]
100%|██████████| 51/51 [00:02<00:00, 19.41it/s]
100%|██████████| 51/51 [00:04<00:00, 12.15it/s]
100%|██████████| 51/51 [00:03<00:00, 16.93it/s]
100%|██████████| 51/51 [00:03<00:00, 16.81it/s]
100%|██████████| 51/51 [00:02<00:00, 20.05it/s]
100%|██████████| 51/51 [00:02<00:00, 23.71it/s]]
100%|██████████| 51/51 [00:02<00:00, 22.41it/s]]
100%|██████████| 51/51 [00:03<00:00, 13.51it/s]]
100%|██████████| 51/51 [00:02<00:00, 17.20it/s]]
100%|██████████| 51/51 [00:03<00:00, 14.76it/s]]
100%|██████████| 51/51 [00:02<00:00, 22.37it/s]]
100%|██████████| 51/51 [00:02<00:00, 23.64it/s]]
100%|██████████| 51/51 [00:02<00:00, 22.34it/s]]
100%|██████████| 51/51 [00:03<00:00, 13.45it/s]]
100%|██████████| 51/51 [00:02<00:00, 20.16it/s]]
100%|██████████| 51/51 [00:02<00:00, 18.51it/s]]
100%|██████████| 51/51 [00:02<00:00, 22.31it/s]]
100%|██████████| 51/51 [00:02<00:00, 17.97it/s]]
100%|██████████| 51/51 [00:02<00:00, 20.70it/s]]
100%|██████████| 51/51 [00

In [None]:
print(f'The dataset contains {obs_features.shape[0]} samples and {obs_features.shape[1]} features.')

The dataset contains 128 samples and 2881 features.


## Let's now do the analysis for the responsive channels

In [None]:
X = obs_features.drop('label', axis=1)
y = obs_features['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=RANDOM_STATE)

### Train a model (Logistic Regression)

In [None]:
logreg = LogisticRegressionModel()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.77


#### With PCA

In [None]:
logreg = LogisticRegressionModel(use_pca=True, expl_var=0.95)
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.69


### Train a model (SVM)

In [None]:
svm = SVMModel()
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.62


#### With PCA

In [None]:
svm = SVMModel(use_pca=True, expl_var=0.95)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.51


### Train a model (Random Forest)

In [None]:
svm = RandomForestModel()
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.54
