In [2]:
import sys 
sys.path.append('../')

from utils.dataset import EEGDataset
from pathlib import Path


eeg_dir = Path('/home/admin/work/NetworkMachineLearning_2023/EEGDataset/')
subjects = ['sub-01', 'sub-02', 'sub-03', 'sub-04']

data = EEGDataset(eeg_dir, subjects)

#### Load data

In [3]:
import numpy as np
from scipy import stats

# Extract whole dataset
X = []
y = []
for sample in data:
    X.append(sample['eeg'])
    y.append(sample['label'])

X = np.array(X)
y = np.array(y)

#### Extract features

In [4]:
# Extract features

def extract_features(X, features):

    X_feat = [feat(X) for feat in features]
    X_feat = np.concatenate(X_feat, axis=-1)

    return X_feat 


def mean(X):
    return np.mean(X, axis=-1)

def std(X):
    return np.std(X, axis=-1)

def ptp(X):
    return np.ptp(X, axis=-1)

def var(X):
    return np.var(X, axis=-1)

def minim(X):
    return np.min(X, axis=-1)

def maxim(X):
    return np.max(X, axis=-1)

def argminim(X):
    return np.argmin(X, axis=-1)

def argmaxim(X):
    return np.argmax(X, axis=-1)

def rms(X):
    return np.sqrt(np.mean(X**2, axis=-1))

def abs_diff_signal(X):
    return np.sum(np.abs(np.diff(X, axis=-1)), axis=-1)

def skewness(X):
    return stats.skew(X, axis=-1)

def kurtosis(X):
    return stats.kurtosis(X, axis=-1)


In [5]:
features = [mean, std, ptp, var, minim, maxim, argminim, argmaxim, rms,
            abs_diff_signal, skewness, kurtosis]

X_feat = extract_features(X, features)
print(X_feat.shape) # (n_samples, n_channels*n_features)

(2225, 1536)


#### Train models

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline 
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GroupKFold, GridSearchCV 

Logistic Regression:

In [7]:
clf = LogisticRegression()
pipe = Pipeline([('scaler',StandardScaler()), ('clf',clf)])
param_grid = {'clf__C':[0.1,0.5,0.7,1,3,5,7]}
gscv = GridSearchCV(pipe, param_grid, cv=5, n_jobs=12)
gscv.fit(X_feat, y)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [9]:
gscv.cv_results_['mean_test_score']

array([0.60853933, 0.59865169, 0.59820225, 0.6       , 0.59910112,
       0.6       , 0.60224719])

Support vector machine:

In [10]:
clf = SVC(gamma='auto')
pipe = Pipeline([('scaler',StandardScaler()),('clf',clf)])
param_grid = {'clf__C':[0.1,0.5,0.7,1,3,5,7]}
gscv = GridSearchCV(pipe, param_grid, cv=5, n_jobs=12)
gscv.fit(X_feat, y)

In [11]:
gscv.cv_results_['mean_test_score']

array([0.56764045, 0.62606742, 0.64179775, 0.65707865, 0.67101124,
       0.67011236, 0.67146067])

Multilayer perceptron

In [31]:
import torch 
import torch.nn as nn 
from sklearn.model_selection import train_test_split

class Net(nn.Module):
    def __init__(self, input_size, n_layers):
        super().__init__()

        self.first_layer = nn.Sequential(
            nn.Linear(input_size, 100),
            nn.LeakyReLU(),
        )

        self.hidden_layers = []
        for _ in range(n_layers):
            self.hidden_layers.append(nn.Sequential(
                nn.Linear(100, 100),
                nn.LeakyReLU(),
            ))

        self.last_layer = nn.Sequential(
            nn.Linear(100, 2),
        )

    def forward(self, x):

        x = self.first_layer(x)
        
        for layer in self.hidden_layers:
            x = layer(x)

        x = self.last_layer(x)

        return x

In [35]:
def train(model, X, y, loss_fn, optimizer):

    model.train()

    y_pred = model(X)
    loss = loss_fn(y_pred, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


def valid(model, X, y, loss_fn):

    model.eval()

    val_loss, correct = 0, 0
    with torch.no_grad():
        pred = model(X)
        val_loss = loss_fn(pred, y).item
        correct = (pred.argmax(1) == y)/len(y)

    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {val_loss:>8f} \n")

    

In [36]:
model = Net(input_size=X_feat.shape[1], n_layers=5)

X_train, X_test, y_train, y_test = train_test_split(X_feat, y, test_size=0.1)

scaler = StandardScaler()

X_train = torch.Tensor(scaler.fit_transform(X_train)).float()
y_train = torch.Tensor(y_train).float()
X_test  = torch.Tensor(scaler.transform(X_test)).float()
y_test  = torch.Tensor(y_train).float()



loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(model, X_train, y_train, loss_fn, optimizer)
    valid(model, X_test, y_test, loss_fn)
print("Done!")


Epoch 1
-------------------------------


RuntimeError: expected scalar type Long but found Float

In [29]:
X_test.shape

torch.Size([223, 1536])