### Импорт библиотек

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import mne
import warnings
import torch
import torch.nn as nn
import torch.optim as optim

from tqdm import tqdm

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score

from sklearn.decomposition import PCA

from pyriemann.estimation import Covariances
from pyriemann.estimation import XdawnCovariances
from pyriemann.tangentspace import TangentSpace
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

#%load_ext autoreload
#%autoreload 2
warnings.filterwarnings('ignore')

#from google.colab import drive
#drive.mount('/content/drive')

In [2]:
#!pip install mne
#!pip install pyriemann

In [3]:
#!pipreqs . --force

## 1) Получаем набор данных и проводим аугментацию

### eyes open/eyes closed on EEG Motor Movement/Imagery Dataset

- EEG Motor Movement датасет состоит из 109 испытуемых
- Данные каждого испытуемого состоят из 14 выполненных тестов
- Мы сейчас сосредоточимся за заданиях 1 и 2 с классификацией открытых/закрытых глаз
- Задание 1 $-$ бег с открытыми глазами
- Задание 2 $-$ бег с закрытыми глазами

Subject_1

- Число каналов 64
- Размер выборки 9760
- Частота измерений 160 $c^{-1}$

In [4]:
class data():
    def __init__(self):
        pass

    def get_data(self):
        data = []

        for i in range(109):
            if len(str(i+1)) == 1:
                sub_number = '00'+str(i+1)
            elif len(str(i+1)) == 2:
                sub_number = '0'+str(i+1)
            else:
                sub_number = str(i+1)

            path_0 = os.path.join(os.path.dirname(os.getcwd()), 'code', 'eeg-motor-movementimagery-dataset', 'files', 'S' + sub_number, 'S' + sub_number + 'R01.edf')
            path_1 = os.path.join(os.path.dirname(os.getcwd()), 'code', 'eeg-motor-movementimagery-dataset', 'files', 'S' + sub_number, 'S' + sub_number + 'R02.edf')

            # colab
            #path_0 = os.path.join('/content/drive/My Drive/7_sem/diplom/', 'code', 'eeg-motor-movementimagery-dataset', 'files', 'S' + sub_number, 'S' + sub_number + 'R01.edf')
            #path_1 = os.path.join('/content/drive/My Drive/7_sem/diplom/', 'code', 'eeg-motor-movementimagery-dataset', 'files', 'S' + sub_number, 'S' + sub_number + 'R02.edf')


            X_0 = mne.io.read_raw_edf(path_0, preload=True, verbose = False).get_data()
            X_1 = mne.io.read_raw_edf(path_1, preload=True, verbose = False).get_data()
            data.append((X_0, 0))
            data.append((X_1, 1))
        self.data = np.array(data)

    # Аугментация
    def get_augmented_data(self, window_size = 610):
        self.get_data()
        augmented_data = []
        for time_series, label in self.data:
            divided_time_series = [time_series[:, i:i+window_size] for i in range(0, time_series.shape[1], window_size)]
            for ts in divided_time_series:
                augmented_data.append((ts, label))

        self.augmented_data = np.array(augmented_data)

        # Приводим к одному размеру окна
        new_augmented_data = []
        for i in range(len(self.augmented_data)):
            if self.augmented_data[i][0].shape[1] == 610:
                new_augmented_data.append(self.augmented_data[i])

        self.augmented_data = np.array(new_augmented_data)

`TODO Стоит добавить еще аугментаций и изучить методы аугментаций временных рядов`

In [5]:
dataset = data()
dataset.get_augmented_data()
augmented_data = dataset.augmented_data

In [6]:
augmented_data.shape

(3483, 2)

In [7]:
ts = np.array([augmented_data[i][0] for i in range(len(augmented_data))])
y = np.array([augmented_data[i][1] for i in range(len(augmented_data))])

### Разделим данные на тренировочные и тестовые

In [8]:
ts_train, ts_test, y_train, y_test = train_test_split(ts, y, test_size=0.2, random_state=42)

In [9]:
print(f'In train data class 0: {np.sum(1 - y_train)}, class 1: {np.sum(y_train)}')

In train data class 0: 1392, class 1: 1394


## Выпрямим в вектор временные ряды и применим SVM

In [10]:
ts_train_flatten = np.array([ts.flatten() for ts in ts_train])
ts_test_flatten = np.array([ts.flatten() for ts in ts_test])

In [11]:
ts_train_flatten.shape

(2786, 39040)

In [15]:
clf = SVC(kernel='rbf')

# cross validation
accuracy = cross_val_score(clf, ts_train_flatten, y_train)

print(accuracy.mean())

0.7322381163812797


In [16]:
clf = SVC(kernel='rbf')
clf.fit(ts_train_flatten, y_train)

y_pred = clf.predict(ts_test_flatten)

In [17]:
print(f'Accuracy on test {round(accuracy_score(y_test, y_pred), 3)}, f1-score on test {round(f1_score(y_test, y_pred), 3)}')

Accuracy on test 0.697, f1-score on test 0.731


## Воспользуемся библиотекой pyriemann

### Перейдем в касательное пространство и воспользуемся классическими методами классификации

In [13]:
ts_train.shape # Ntrials x Nchannels X Nsamples

(2786, 64, 610)

In [14]:
F1_score = {}
Accuracy = {}

### 1) SVM

In [None]:
# build pipeline
covest = Covariances()
ts = TangentSpace()
svc = SVC(kernel='rbf')

clf = make_pipeline(covest,ts,svc)
# cross validation
accuracy = cross_val_score(clf, ts_train, y_train)

print(accuracy.mean())

0.9393377219230002


In [None]:
clf = make_pipeline(covest,ts,svc)
clf.fit(ts_train, y_train)

y_pred = clf.predict(ts_test)

In [None]:
print(f'Accuracy on test {round(accuracy_score(y_test, y_pred), 3)}, f1-score on test {round(f1_score(y_test, y_pred), 3)}')

Accuracy['SVM'] = round(accuracy_score(y_test, y_pred), 3)
F1_score['SVM'] = round(f1_score(y_test, y_pred), 3)

Accuracy on test 0.954, f1-score on test 0.954


Аналогичным образом попробуем другие классические методы классификации после переход в касательное пространство

### 2) LogisticRegression

In [None]:
covest = Covariances()
ts = TangentSpace()
logreg = LogisticRegression()

clf = make_pipeline(covest,ts, logreg)
# cross validation
accuracy = cross_val_score(clf, ts_train, y_train)

print(accuracy.mean())

0.913853014420571


In [None]:
clf = make_pipeline(covest,ts,logreg)
clf.fit(ts_train, y_train)

y_pred = clf.predict(ts_test)

In [None]:
print(f'Accuracy on test {round(accuracy_score(y_test, y_pred), 3)}, f1-score on test {round(f1_score(y_test, y_pred), 3)}')

Accuracy['LogisticRegression'] = round(accuracy_score(y_test, y_pred), 3)
F1_score['LogisticRegression'] = round(f1_score(y_test, y_pred), 3)

Accuracy on test 0.91, f1-score on test 0.909


### 3) DecisionTreeClassifier

In [None]:
covest = Covariances()
ts = TangentSpace()
rf= RandomForestClassifier(n_estimators=200)

clf = make_pipeline(covest,ts, rf)
# cross validation
accuracy = cross_val_score(clf, ts_train, y_train)
print(accuracy.mean())

0.8797519996396467


In [None]:
clf = make_pipeline(covest,ts,rf)
clf.fit(ts_train, y_train)

y_pred = clf.predict(ts_test)

In [None]:
print(f'Accuracy on test {round(accuracy_score(y_test, y_pred), 3)}, f1-score on test {round(f1_score(y_test, y_pred), 3)}')

Accuracy['RandomForestClassifier'] = round(accuracy_score(y_test, y_pred), 3)
F1_score['RandomForestClassifier'] = round(f1_score(y_test, y_pred), 3)

Accuracy on test 0.9, f1-score on test 0.901


### 4) Добавим фильтрацию XdawnCovariances


In [None]:
# SVM
covest = XdawnCovariances()
ts = TangentSpace(metric='riemann')
svc = SVC(kernel='rbf')

clf = make_pipeline(covest,ts,svc)
# cross validation
accuracy = cross_val_score(clf, ts_train, y_train)

print(accuracy.mean())

0.7738647259061923


In [None]:
# LogisticRegression
covest = covest = XdawnCovariances()
ts = TangentSpace()
logreg = LogisticRegression()

clf = make_pipeline(covest,ts, logreg)
# cross validation
accuracy = cross_val_score(clf, ts_train, y_train)

print(accuracy.mean())

0.7282909596339839


In [None]:
# RandomForestClassifier
covest = XdawnCovariances()
ts = TangentSpace()
rf= RandomForestClassifier(n_estimators=200)

clf = make_pipeline(covest,ts, rf)
# cross validation
accuracy = cross_val_score(clf, ts_train, y_train)
print(accuracy.mean())

0.7447848497133261


### 5) Применим PCA

In [15]:
# SVM
covest = Covariances()
ts = TangentSpace(metric='riemann')
pca = PCA(1000)
svc = SVC(kernel='rbf')

clf = make_pipeline(covest,ts,pca,svc)
# cross validation
accuracy = cross_val_score(clf, ts_train, y_train)

print(accuracy.mean())

0.9411298366183407


In [19]:
clf = make_pipeline(covest,ts,pca,svc)
clf.fit(ts_train, y_train)

y_pred = clf.predict(ts_test)

In [20]:
print(f'Accuracy on test {round(accuracy_score(y_test, y_pred), 3)}, f1-score on test {round(f1_score(y_test, y_pred), 3)}')

Accuracy['SVM_pca'] = round(accuracy_score(y_test, y_pred), 3)
F1_score['SVM_pca'] = round(f1_score(y_test, y_pred), 3)

Accuracy on test 0.954, f1-score on test 0.954


### 6) Полносвязная нейронная сеть

In [16]:
covest = Covariances()
ts = TangentSpace(metric='riemann')
preprocess = make_pipeline(covest,ts)
preprocess.fit(ts_train, y_train)

X_train = preprocess.transform(ts_train)
X_test = preprocess.transform(ts_test)

In [17]:
def get_loader(X, y, batch_size=64):
    train = torch.utils.data.TensorDataset(torch.from_numpy(X).float(),
                                       torch.from_numpy(y).long())
    train_loader = torch.utils.data.DataLoader(train,
                                               batch_size=batch_size)
    return train_loader

In [21]:
def count_parameters(model):
    return sum(param.data.numpy().size for param \
               in model.parameters() if param.requires_grad)

In [22]:
def train_epoch(model, optimizer, train_loader, criterion, device):
    model.train()

    for batch_idx, (data_inputs, data_labels) in enumerate(train_loader):

        ## Step 1: Move input data to device (only strictly necessary if we use GPU)
        data_inputs = data_inputs.to(device)
        data_labels = data_labels.to(device)

        ## Step 2: Run the model on the input data
        preds = model(data_inputs)

        ## Step 3: Calculate the loss
        loss = criterion(preds, data_labels.long())
        ## Step 4: Perform backpropagation
        # Before calculating the gradients, we need to ensure that they are all zero.
        # The gradients would not be overwritten, but actually added to the existing ones.
        optimizer.zero_grad()

        # Perform backpropagation
        loss.backward()

        ## Step: Update the parameters
        optimizer.step()


def evaluate_loss_acc(loader, model, criterion, device):
    model.eval() # Set model to eval mode
    loss, true_preds, num_preds = 0., 0., 0.

    with torch.no_grad(): # Deactivate gradients for the following code
        for data_inputs, data_labels in loader:

            # Determine prediction of model on dev set
            data_inputs, data_labels = data_inputs.to(device), data_labels.to(device)
            preds = model(data_inputs)

            # Calculate the loss on the batch
            batch_loss = criterion(preds, data_labels.long())
            # Add batch_loss to the summary loss
            loss += batch_loss * data_labels.shape[0]

            preds = torch.softmax(preds, dim = 1) # Softmax to map predictions between 0 and 1
            pred_labels = torch.argmax(preds, dim=1)

            # Keep records of predictions for the accuracy metric (true_preds=TP+TN, num_preds=TP+TN+FP+FN)
            true_preds += (pred_labels == data_labels).sum()
            num_preds += data_labels.shape[0]

    acc = true_preds / num_preds
    loss = loss / num_preds
    return (loss, acc)
    print(f"Loss of the model: {loss}%")
    print(f"Accuracy of the model: {100.0*acc:4.2f}%")



def train(model, opt, train_loader, test_loader, criterion, n_epochs, \
          device, verbose=True):

    train_log, train_acc_log = [], []
    val_log, val_acc_log = [], []

    for epoch in range(n_epochs):
        train_epoch(model, opt, train_loader, criterion, device)
        train_loss, train_acc = evaluate_loss_acc(train_loader,
                                                  model, criterion,
                                                  device)
        val_loss, val_acc = evaluate_loss_acc(test_loader, model,
                                              criterion, device)

        train_log.append(train_loss.cpu())
        train_acc_log.append(train_acc.cpu())

        val_log.append(val_loss.cpu())
        val_acc_log.append(val_acc.cpu())

        if verbose:
             print (('Epoch [%d/%d], Loss (train/test): %.4f/%.4f,'+\
               ' Acc (train/test): %.4f/%.4f' )
                   %(epoch+1, n_epochs, \
                     train_loss, val_loss, train_acc, val_acc))

    return train_log, train_acc_log, val_log, val_acc_log

---
Обучим нейронную сеть

In [32]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Linear(2080, 1040),
            nn.ReLU())
        self.layer2 = nn.Sequential(
            nn.Linear(1040, 500),
            nn.ReLU())
        self.layer3 = nn.Sequential(
            nn.Linear(500, 100),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Linear(100, 20),
            nn.ReLU())
        self.layer5 = nn.Linear(20, 2)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        return x

In [37]:
net = Net()

In [38]:
criterion = nn.CrossEntropyLoss() # loss includes softmax
optimizer = optim.SGD(net.parameters(), lr=0.1)
#optimizer = optim.Adam(net.parameters(), lr=0.001)

In [39]:
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

net = net.to(device)

In [40]:
n_epochs = 30

train_loader = get_loader(X_train, y_train, batch_size=64)
test_loader = get_loader(X_test, y_test, batch_size=64)

train_log, train_acc_log, test_log, test_acc_log = train(net, optimizer, train_loader, test_loader, criterion, n_epochs, device, verbose=True)

Epoch [1/30], Loss (train/test): 0.6899/0.6905, Acc (train/test): 0.5782/0.5567
Epoch [2/30], Loss (train/test): 0.6798/0.6814, Acc (train/test): 0.7279/0.7059
Epoch [3/30], Loss (train/test): 0.6171/0.6243, Acc (train/test): 0.7976/0.7819
Epoch [4/30], Loss (train/test): 0.4085/0.4734, Acc (train/test): 0.8195/0.7862
Epoch [5/30], Loss (train/test): 0.5060/0.6320, Acc (train/test): 0.8324/0.7891
Epoch [6/30], Loss (train/test): 0.2269/0.3689, Acc (train/test): 0.9142/0.8623
Epoch [7/30], Loss (train/test): 0.0597/0.1664, Acc (train/test): 0.9806/0.9397
Epoch [8/30], Loss (train/test): 0.0609/0.1885, Acc (train/test): 0.9756/0.9225
Epoch [9/30], Loss (train/test): 0.0182/0.1333, Acc (train/test): 0.9961/0.9541
Epoch [10/30], Loss (train/test): 0.0466/0.1786, Acc (train/test): 0.9853/0.9369
Epoch [11/30], Loss (train/test): 0.0101/0.1218, Acc (train/test): 0.9982/0.9555
Epoch [12/30], Loss (train/test): 0.0051/0.1096, Acc (train/test): 0.9996/0.9613
Epoch [13/30], Loss (train/test): 0.0

### 7) LSTM

In [64]:
class LSTMClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(LSTMClassifier, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        _, (h_n, _) = self.lstm(x)
        out = self.fc(h_n[-1])
        return out

# Параметры модели
input_size = 2080  # Размерность касательного риманова пространства
hidden_size = 128  # Размер скрытого состояния LSTM
num_classes = 2    # Количество классов в задаче

In [65]:
lstm = LSTMClassifier(input_size, hidden_size, num_classes)

In [66]:
criterion = nn.CrossEntropyLoss() # loss includes softmax
optimizer = torch.optim.Adam(lstm.parameters(), lr=0.001)

In [67]:
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

lstm = lstm.to(device)

In [68]:
train_loader = get_loader(X_train, y_train, batch_size=64)
test_loader = get_loader(X_test, y_test, batch_size=64)

In [69]:
# Процесс обучения
num_epochs = 30
for epoch in range(num_epochs):
    for inputs, labels in train_loader:  # train_loader - загрузчик данных для обучения
        optimizer.zero_grad()
        inputs = inputs.to(device)
        labels = labels.to(device)
        # Передача данных касательного риманова пространства в LSTM
        inputs = inputs.unsqueeze(1)  # Добавляем размерность канала

        # Прямой проход через модель
        outputs = lstm(inputs)

        # Вычисление функции потерь
        loss = criterion(outputs.squeeze(), labels)

        # Обратное распространение и оптимизация модели
        loss.backward()
        optimizer.step()

    # Вывод информации о процессе обучения
    if (epoch+1) % 1 == 0:
        print(f'Эпоха [{epoch+1}/{num_epochs}], Потери: {loss.item():.4f}')

# Тестирование модели
lstm.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:  # test_loader - загрузчик данных для тестирования
        inputs = inputs.to(device)
        labels = labels.to(device)
        inputs = inputs.unsqueeze(1)  # Добавляем размерность канала
        outputs = lstm(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f'Acc on test: {100 * accuracy:.2f}%')

Эпоха [1/30], Потери: 0.3493
Эпоха [2/30], Потери: 0.1481
Эпоха [3/30], Потери: 0.0614
Эпоха [4/30], Потери: 0.0230
Эпоха [5/30], Потери: 0.0094
Эпоха [6/30], Потери: 0.0057
Эпоха [7/30], Потери: 0.0041
Эпоха [8/30], Потери: 0.0030
Эпоха [9/30], Потери: 0.0020
Эпоха [10/30], Потери: 0.0013
Эпоха [11/30], Потери: 0.0010
Эпоха [12/30], Потери: 0.0008
Эпоха [13/30], Потери: 0.0007
Эпоха [14/30], Потери: 0.0006
Эпоха [15/30], Потери: 0.0005
Эпоха [16/30], Потери: 0.0005
Эпоха [17/30], Потери: 0.0004
Эпоха [18/30], Потери: 0.0004
Эпоха [19/30], Потери: 0.0003
Эпоха [20/30], Потери: 0.0003
Эпоха [21/30], Потери: 0.0003
Эпоха [22/30], Потери: 0.0003
Эпоха [23/30], Потери: 0.0002
Эпоха [24/30], Потери: 0.0002
Эпоха [25/30], Потери: 0.0002
Эпоха [26/30], Потери: 0.0002
Эпоха [27/30], Потери: 0.0002
Эпоха [28/30], Потери: 0.0002
Эпоха [29/30], Потери: 0.0001
Эпоха [30/30], Потери: 0.0001
Acc on test: 95.70%


----

In [None]:
from models import EEGNet
from fitting import train

if torch.cuda.is_available():
    target_device = 'cuda'
else:
    target_device = 'cpu'

model = EEGNet().to(target_device)