In [1]:
import sys 
sys.path.append('../')

from utils.dataset import EEGDataset
from pathlib import Path


eeg_dir = Path('../EEGDataset/raw')
subjects = ['sub-01', 'sub-02', 'sub-03', 'sub-04']

data = EEGDataset(eeg_dir, subjects)

#### Load data

In [2]:
import numpy as np
from scipy import stats

# Extract whole dataset
X = []
y = []
for sample in data:
    X.append(sample['eeg'])
    y.append(sample['label'])

X = np.array(X)
y = np.array(y)

#### Extract features

In [3]:
# Extract features

def extract_features(X, features):

    X_feat = [feat(X) for feat in features]
    X_feat = np.concatenate(X_feat, axis=-1)

    return X_feat 


def mean(X):
    return np.mean(X, axis=-1)

def std(X):
    return np.std(X, axis=-1)

def ptp(X):
    return np.ptp(X, axis=-1)

def var(X):
    return np.var(X, axis=-1)

def minim(X):
    return np.min(X, axis=-1)

def maxim(X):
    return np.max(X, axis=-1)

def argminim(X):
    return np.argmin(X, axis=-1)

def argmaxim(X):
    return np.argmax(X, axis=-1)

def rms(X):
    return np.sqrt(np.mean(X**2, axis=-1))

def abs_diff_signal(X):
    return np.sum(np.abs(np.diff(X, axis=-1)), axis=-1)

def skewness(X):
    return stats.skew(X, axis=-1)

def kurtosis(X):
    return stats.kurtosis(X, axis=-1)


In [4]:
features = [mean, std, ptp, var, minim, maxim, argminim, argmaxim, rms,
            abs_diff_signal, skewness, kurtosis]

X_feat = extract_features(X, features)
print(X_feat.shape) # (n_samples, n_channels*n_features)

(2225, 1536)


#### Train models

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline 
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GroupKFold, GridSearchCV 

Logistic Regression:

In [6]:
clf = LogisticRegression()
pipe = Pipeline([('scaler',StandardScaler()), ('clf',clf)])
param_grid = {'clf__C':[0.1,0.5,0.7,1,3,5,7]}
gscv = GridSearchCV(pipe, param_grid, cv=5, n_jobs=12)
gscv.fit(X_feat, y)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [7]:
gscv.cv_results_['mean_test_score']

array([0.70247191, 0.68808989, 0.68764045, 0.6858427 , 0.68494382,
       0.68808989, 0.69078652])

Support vector machine:

In [8]:
clf = SVC(gamma='auto')
pipe = Pipeline([('scaler',StandardScaler()),('clf',clf)])
param_grid = {'clf__C':[0.1,0.5,0.7,1,3,5,7]}
gscv = GridSearchCV(pipe, param_grid, cv=5, n_jobs=12)
gscv.fit(X_feat, y)

In [9]:
gscv.cv_results_['mean_test_score']

array([0.66202247, 0.72269663, 0.73573034, 0.7411236 , 0.75820225,
       0.75685393, 0.75640449])