## Demo of the effectiveness of the training pipeline


In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib qt5

In [2]:
import warnings, copy, os
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import mne
import joblib

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

import skorch
from skorch.classifier import NeuralNetClassifier
from skorch.helper import predefined_split
from skorch.callbacks import (LRScheduler, EpochScoring, Checkpoint, Callback,
                              TrainEndCheckpoint, LoadInitState, EarlyStopping)

from sklearn.metrics import confusion_matrix

from brainda.datasets import BNCI2014001

from brainda.paradigms import MotorImagery
from brainda.algorithms.utils.model_selection import (
    set_random_seeds, generate_kfold_indices, match_kfold_indices)
from brainda.algorithms.deep_learning import EEGNet, ShallowNet

from braindecode.models import ShallowFBCSPNet, TIDNet, EEGNetv4
from braindecode import EEGClassifier

from utils import *

In [3]:
device_id = 0
device = torch.device("cuda:{:d}".format(device_id) if torch.cuda.is_available() else "cpu")
if device != 'cpu':
    torch.backends.cudnn.benchmark = True
print("Available GPU devices: {}".format(torch.cuda.device_count()))
print("Current pytorch device: {}".format(device))

Available GPU devices: 1
Current pytorch device: cuda:0


Raw data without any preprocessing steps

- epoch: [-0.5, 4]
- srate: 250Hz
- channels: 22
- classes: left hand, right hand, feet, and tongue
- current subject: 3

Note: In our settings, `session_0` is `session_E` and `session_1` is `session_T`.

In [4]:
def raw_hook(raw, caches, verbose=False):
    # do nothing
    return raw, caches

srate = 250
dataset = BNCI2014001()
selected_channels = [
    'FZ', 'FC3', 'FC1', 'FCZ', 'FC2', 'FC4', 'C5', 'C3', 'C1', 'CZ', 'C2', 'C4', 'C6', 'CP3', 'CP1', 'CPZ', 'CP2', 'CP4', 'P1', 'PZ', 'P2', 'POZ']
start_t = dataset.events['left_hand'][1][0] - 0.5
duration = 4.5 # seconds
if start_t+ duration > dataset.events['left_hand'][1][1]:
    print("Warning: the current dataset avaliable trial duration is not long enough.")
events = ['left_hand', 'right_hand', 'feet', 'tongue']
paradigm = MotorImagery(
    channels=selected_channels, 
    srate=srate, 
    intervals=[(start_t, start_t + duration)], 
    events=events)

event_id = [dataset.events[e][0] for e in events]
paradigm.register_raw_hook(raw_hook)
X, y, meta = paradigm.get_data(dataset, subjects=[3], return_concat=True, verbose=False)
y = label_encoder(y, event_id)

### Braindecode training pipeline

Adapted from https://github.com/braindecode/braindecode/blob/master/examples/plot_bcic_iv_2a_moabb_trial.py

No bandpass filtering and exponential moving standardization

In [5]:
set_random_seeds(20200220)
n_trials, n_channels, n_samples = X.shape
labels =  np.unique(y)
n_classes = len(labels)

model = ShallowFBCSPNet(
    n_channels, n_classes,
    input_window_samples=n_samples,
    final_conv_length='auto'
)

initial_state = copy.deepcopy(model.state_dict())
criterion = nn.NLLLoss
optimizer = optim.AdamW

batch_size = 64
lr = 0.0625 * 0.01
max_epochs = 100

ckp_dirname = 'runs_training_pipeline'
verbose = True

# train set, session_T
ind = meta[meta['session']=='session_1'].sort_values(by=['session', 'run', 'trial_id']).index
trainX, trainY = np.copy(X[ind]), np.copy(y[ind])
# test set, session_E
ind = meta[meta['session']=='session_0'].sort_values(by=['session', 'run', 'trial_id']).index
validX, validY = np.copy(X[ind]), np.copy(y[ind])

trainX, validX = generate_tensors(trainX, validX, dtype=torch.float)
trainY, validY = generate_tensors(trainY, validY, dtype=torch.long)

torch.cuda.empty_cache()

model.load_state_dict(copy.deepcopy(initial_state))
train_split = predefined_split(
    skorch.dataset.Dataset(
        validX, validY))
net = EEGClassifier(model,
        criterion=criterion,
        optimizer=optimizer,
        optimizer__lr=lr,
        optimizer__weight_decay=0,
        batch_size=batch_size,
        device=device,
        train_split=train_split,
        iterator_train__shuffle=True,
        callbacks=[
            "accuracy",
            ('lr_scheduler', LRScheduler('CosineAnnealingLR', T_max=max_epochs - 1)),
        ],
        verbose=verbose)
net = net.fit(
    trainX, y=trainY, epochs=max_epochs)

  epoch    train_accuracy    train_loss    valid_accuracy    valid_loss      lr     dur
-------  ----------------  ------------  ----------------  ------------  ------  ------
      1            [36m0.3854[0m        [32m1.5341[0m            [35m0.3194[0m        [31m1.7365[0m  0.0006  0.6304
      2            [36m0.4306[0m        [32m1.2407[0m            [35m0.3611[0m        [31m1.6086[0m  0.0006  0.1703
      3            [36m0.5660[0m        [32m1.0748[0m            [35m0.3958[0m        [31m1.4128[0m  0.0006  0.1678
      4            [36m0.7118[0m        [32m0.9865[0m            [35m0.4444[0m        [31m1.2122[0m  0.0006  0.1665
      5            [36m0.7743[0m        [32m0.8446[0m            [35m0.4861[0m        [31m1.1604[0m  0.0006  0.1682
      6            [36m0.8160[0m        [32m0.7972[0m            [35m0.4965[0m        [31m1.1211[0m  0.0006  0.1670
      7            [36m0.8542[0m        [32m0.6772[0m            [35m0.5347

### Our training pipeline with ShallowFBCSPNet

The difference with the above code is that we use `NeuralNetClassifierNoLog` by removing the log operation in `NeuralNetClassifier`, since ShallowFBCSPNet has already implemented `LogSoftmax` itself.

our training pipeline can achieve similar training results.

In [6]:
class NeuralNetClassifierNoLog(NeuralNetClassifier):
    def get_loss(self, y_pred, y_true, *args, **kwargs):
        return super(NeuralNetClassifier, self).get_loss(y_pred, y_true, *args, **kwargs)

set_random_seeds(20200220)
n_trials, n_channels, n_samples = X.shape
labels =  np.unique(y)
n_classes = len(labels)

model = ShallowFBCSPNet(
    n_channels, n_classes,
    input_window_samples=n_samples,
    final_conv_length='auto'
)

initial_state = copy.deepcopy(model.state_dict())
criterion = nn.NLLLoss
optimizer = optim.AdamW

batch_size = 64
lr = 0.0625 * 0.01
max_epochs = 100

ckp_dirname = 'runs_training_pipeline'
verbose = True

# train set, session_T
ind = meta[meta['session']=='session_1'].sort_values(by=['session', 'run', 'trial_id']).index
trainX, trainY = np.copy(X[ind]), np.copy(y[ind])
# test set, session_E
ind = meta[meta['session']=='session_0'].sort_values(by=['session', 'run', 'trial_id']).index
validX, validY = np.copy(X[ind]), np.copy(y[ind])

trainX, validX = generate_tensors(trainX, validX, dtype=torch.float)
trainY, validY = generate_tensors(trainY, validY, dtype=torch.long)

torch.cuda.empty_cache()
ckp = Checkpoint(dirname=ckp_dirname)
train_end_ckp = TrainEndCheckpoint(dirname=ckp_dirname)

model.load_state_dict(copy.deepcopy(initial_state))
train_split = predefined_split(
    skorch.dataset.Dataset(
        validX, validY))
net = NeuralNetClassifierNoLog(model,
        criterion=criterion,
        optimizer=optimizer,
        optimizer__lr=lr,
        optimizer__weight_decay=0,
        batch_size=batch_size,
        device=device,
        train_split=train_split,
        iterator_train__shuffle=True,
        callbacks=[
            ('train_acc', EpochScoring('accuracy', 
                                    name='train_acc', 
                                    on_train=True, 
                                    lower_is_better=False)),
            ('lr_scheduler', LRScheduler('CosineAnnealingLR', T_max=max_epochs - 1)),
            ckp,
            train_end_ckp
        ],
        verbose=verbose)
net = net.fit(
    trainX, y=trainY, epochs=max_epochs)
net.load_params(checkpoint=ckp)

  epoch    train_acc    train_loss    valid_acc    valid_loss    cp      lr     dur
-------  -----------  ------------  -----------  ------------  ----  ------  ------
      1       [36m0.2917[0m        [32m1.5341[0m       [35m0.3194[0m        [31m1.7365[0m     +  0.0006  0.2156
      2       [36m0.4340[0m        [32m1.2407[0m       [35m0.3611[0m        [31m1.6086[0m     +  0.0006  0.1708
      3       [36m0.4931[0m        [32m1.0748[0m       [35m0.3958[0m        [31m1.4128[0m     +  0.0006  0.1671
      4       [36m0.5729[0m        [32m0.9865[0m       [35m0.4444[0m        [31m1.2122[0m     +  0.0006  0.1675
      5       [36m0.6424[0m        [32m0.8446[0m       [35m0.4861[0m        [31m1.1604[0m     +  0.0006  0.1665
      6       [36m0.7083[0m        [32m0.7972[0m       [35m0.4965[0m        [31m1.1211[0m     +  0.0006  0.1682
      7       [36m0.7431[0m        [32m0.6772[0m       [35m0.5347[0m        [31m1.0755[0m     +  0.0006 

### Our training pipeline with ShallowNet

Here we use the same training pipeline with self-implemented ShallowNet modified from ShallowFBCSPNet, replacing `conv_classifier` with `fc_layer` and removing `LogSoftmax`.

Similar results with small improvement.

In [7]:
set_random_seeds(20200220)
n_trials, n_channels, n_samples = X.shape
labels =  np.unique(y)
n_classes = len(labels)

model = ShallowNet(
    n_channels, n_samples, n_classes)

initial_state = copy.deepcopy(model.state_dict())
criterion = nn.CrossEntropyLoss
optimizer = optim.AdamW

batch_size = 64
lr = 0.0625 * 0.01
max_epochs = 100

ckp_dirname = 'runs_training_pipeline'
verbose = True

# train set, session_T
ind = meta[meta['session']=='session_1'].sort_values(by=['session', 'run', 'trial_id']).index
trainX, trainY = np.copy(X[ind]), np.copy(y[ind])
# test set, session_E
ind = meta[meta['session']=='session_0'].sort_values(by=['session', 'run', 'trial_id']).index
validX, validY = np.copy(X[ind]), np.copy(y[ind])

trainX, validX = generate_tensors(trainX, validX, dtype=torch.float)
trainY, validY = generate_tensors(trainY, validY, dtype=torch.long)

torch.cuda.empty_cache()
ckp = Checkpoint(dirname=ckp_dirname)
train_end_ckp = TrainEndCheckpoint(dirname=ckp_dirname)

model.load_state_dict(copy.deepcopy(initial_state))
train_split = predefined_split(
    skorch.dataset.Dataset(
        {'X': validX}, validY))
net = NeuralNetClassifierNoLog(model,
        criterion=criterion,
        optimizer=optimizer,
        optimizer__lr=lr,
        optimizer__weight_decay=0,
        batch_size=batch_size,
        device=device,
        train_split=train_split,
        iterator_train__shuffle=True,
        callbacks=[
            ('train_acc', EpochScoring('accuracy', 
                                    name='train_acc', 
                                    on_train=True, 
                                    lower_is_better=False)),
            ('lr_scheduler', LRScheduler('CosineAnnealingLR', T_max=max_epochs - 1)),
            ckp,
            train_end_ckp
        ],
        verbose=verbose)
net = net.fit(
    {'X': trainX}, y=trainY, epochs=max_epochs)
net.load_params(checkpoint=ckp)

  epoch    train_acc    train_loss    valid_acc    valid_loss    cp      lr     dur
-------  -----------  ------------  -----------  ------------  ----  ------  ------
      1       [36m0.2222[0m        [32m1.6694[0m       [35m0.2951[0m        [31m1.7453[0m     +  0.0006  0.6310
      2       [36m0.4306[0m        [32m1.2924[0m       [35m0.3403[0m        [31m1.5502[0m     +  0.0006  0.2201
      3       [36m0.5139[0m        [32m1.1181[0m       [35m0.3819[0m        [31m1.4406[0m     +  0.0006  0.2139
      4       [36m0.5486[0m        [32m1.0909[0m       [35m0.4549[0m        [31m1.3102[0m     +  0.0006  0.2147
      5       [36m0.6042[0m        [32m0.9650[0m       [35m0.4653[0m        [31m1.1612[0m     +  0.0006  0.2130
      6       [36m0.6632[0m        [32m0.8896[0m       [35m0.5243[0m        [31m1.1059[0m     +  0.0006  0.2147
      7       [36m0.7326[0m        [32m0.7701[0m       [35m0.5312[0m        [31m1.0827[0m     +  0.0006 

### Our training pipeline with EEGNetv4

In [8]:
class NeuralNetClassifierNoLog(NeuralNetClassifier):
    def get_loss(self, y_pred, y_true, *args, **kwargs):
        return super(NeuralNetClassifier, self).get_loss(y_pred, y_true, *args, **kwargs)

set_random_seeds(20200220)
n_trials, n_channels, n_samples = X.shape
labels =  np.unique(y)
n_classes = len(labels)

model = EEGNetv4(n_channels, n_classes, 
            input_window_samples=n_samples, 
            F1=8,
            D=2,
            F2=16,
            kernel_length=64,
            drop_prob=0.5)

initial_state = copy.deepcopy(model.state_dict())
criterion = nn.NLLLoss
optimizer = optim.AdamW

batch_size = 64
lr = 0.01
max_epochs = 100

ckp_dirname = 'runs_training_pipeline'
verbose = True

# train set, session_T
ind = meta[meta['session']=='session_1'].sort_values(by=['session', 'run', 'trial_id']).index
trainX, trainY = np.copy(X[ind]), np.copy(y[ind])
# test set, session_E
ind = meta[meta['session']=='session_0'].sort_values(by=['session', 'run', 'trial_id']).index
validX, validY = np.copy(X[ind]), np.copy(y[ind])

trainX, validX = generate_tensors(trainX, validX, dtype=torch.float)
trainY, validY = generate_tensors(trainY, validY, dtype=torch.long)

torch.cuda.empty_cache()
ckp = Checkpoint(dirname=ckp_dirname)
train_end_ckp = TrainEndCheckpoint(dirname=ckp_dirname)

model.load_state_dict(copy.deepcopy(initial_state))
train_split = predefined_split(
    skorch.dataset.Dataset(
        validX, validY))
net = NeuralNetClassifierNoLog(model,
        criterion=criterion,
        optimizer=optimizer,
        optimizer__lr=lr,
        optimizer__weight_decay=0,
        batch_size=batch_size,
        device=device,
        train_split=train_split,
        iterator_train__shuffle=True,
        callbacks=[
            ('train_acc', EpochScoring('accuracy', 
                                    name='train_acc', 
                                    on_train=True, 
                                    lower_is_better=False)),
            ('lr_scheduler', LRScheduler('CosineAnnealingLR', T_max=max_epochs - 1)),
            ckp,
            train_end_ckp
        ],
        verbose=verbose)
net = net.fit(
    trainX, y=trainY, epochs=max_epochs)
net.load_params(checkpoint=ckp)

  epoch    train_acc    train_loss    valid_acc    valid_loss    cp      lr     dur
-------  -----------  ------------  -----------  ------------  ----  ------  ------
      1       [36m0.2917[0m        [32m1.5321[0m       [35m0.3681[0m        [31m1.3416[0m     +  0.0100  0.3494
      2       [36m0.4132[0m        [32m1.2833[0m       [35m0.3889[0m        1.3704        0.0100  0.2431
      3       [36m0.4757[0m        [32m1.1720[0m       [35m0.4861[0m        1.3713        0.0100  0.2401
      4       [36m0.5486[0m        [32m1.0289[0m       0.4583        1.5700        0.0100  0.2401
      5       [36m0.6319[0m        [32m0.8532[0m       0.4132        1.8973        0.0100  0.2407
      6       [36m0.6701[0m        [32m0.8333[0m       0.4167        2.0515        0.0099  0.2400
      7       [36m0.7188[0m        [32m0.7220[0m       0.4583        1.7437        0.0099  0.2409
      8       [36m0.7639[0m        [32m0.6607[0m       [35m0.4965[0m        

### Our training pipeline with EEGNet

In [9]:
set_random_seeds(20200220)
n_trials, n_channels, n_samples = X.shape
labels =  np.unique(y)
n_classes = len(labels)

model = EEGNet(n_channels, n_samples, n_classes,
            time_kernel=(8, (1, 64), (1, 1)),
            D=2,
            pool_kernel1=((1, 4), (1, 4)),
            separa_kernel=(16, (1, 16), (1, 1)),
            pool_kernel2=((1, 8), (1, 8)),
            depthwise_norm_rate=1,
            fc_norm_rate=0.25,
            dropout_rate=0.5)

initial_state = copy.deepcopy(model.state_dict())
criterion = nn.CrossEntropyLoss
optimizer = optim.AdamW

batch_size = 64
lr = 0.01
max_epochs = 100

ckp_dirname = 'runs_training_pipeline'
verbose = True

# train set, session_T
ind = meta[meta['session']=='session_1'].sort_values(by=['session', 'run', 'trial_id']).index
trainX, trainY = np.copy(X[ind]), np.copy(y[ind])
# test set, session_E
ind = meta[meta['session']=='session_0'].sort_values(by=['session', 'run', 'trial_id']).index
validX, validY = np.copy(X[ind]), np.copy(y[ind])

trainX, validX = generate_tensors(trainX, validX, dtype=torch.float)
trainY, validY = generate_tensors(trainY, validY, dtype=torch.long)

torch.cuda.empty_cache()
ckp = Checkpoint(dirname=ckp_dirname)
train_end_ckp = TrainEndCheckpoint(dirname=ckp_dirname)

model.load_state_dict(copy.deepcopy(initial_state))
train_split = predefined_split(
    skorch.dataset.Dataset(
        validX, validY))
net = NeuralNetClassifier(model,
        criterion=criterion,
        optimizer=optimizer,
        optimizer__lr=lr,
        optimizer__weight_decay=0,
        batch_size=batch_size,
        device=device,
        train_split=train_split,
        iterator_train__shuffle=True,
        callbacks=[
            ('train_acc', EpochScoring('accuracy', 
                                    name='train_acc', 
                                    on_train=True, 
                                    lower_is_better=False)),
            ('lr_scheduler', LRScheduler('CosineAnnealingLR', T_max=max_epochs - 1)),
            ckp,
            train_end_ckp
        ],
        verbose=verbose)
net = net.fit(
    trainX, y=trainY, epochs=max_epochs)
net.load_params(checkpoint=ckp)

  epoch    train_acc    train_loss    valid_acc    valid_loss    cp      lr     dur
-------  -----------  ------------  -----------  ------------  ----  ------  ------
      1       [36m0.2396[0m        [32m1.5590[0m       [35m0.3333[0m        [31m1.3508[0m     +  0.0100  0.1676
      2       [36m0.3299[0m        [32m1.4080[0m       [35m0.3576[0m        [31m1.3181[0m     +  0.0100  0.1197
      3       [36m0.4167[0m        [32m1.2456[0m       [35m0.4097[0m        [31m1.2868[0m     +  0.0100  0.1186
      4       [36m0.4479[0m        [32m1.1780[0m       [35m0.4271[0m        [31m1.2497[0m     +  0.0100  0.1172
      5       [36m0.5486[0m        [32m1.1128[0m       [35m0.4688[0m        [31m1.2172[0m     +  0.0100  0.1175
      6       [36m0.5694[0m        [32m1.0116[0m       [35m0.4826[0m        [31m1.2103[0m     +  0.0099  0.1182
      7       [36m0.6076[0m        [32m0.9260[0m       [35m0.5035[0m        [31m1.1957[0m     +  0.0099 

In [10]:
class NeuralNetClassifierNoLog(NeuralNetClassifier):
    def get_loss(self, y_pred, y_true, *args, **kwargs):
        return super(NeuralNetClassifier, self).get_loss(y_pred, y_true, *args, **kwargs)

set_random_seeds(20200220)
n_trials, n_channels, n_samples = X.shape
labels =  np.unique(y)
n_classes = len(labels)

model = TIDNet(n_channels, n_classes, 
            input_window_samples=n_samples,
            s_growth=24,
            t_filters=32, 
            temp_layers=2, 
            spat_layers=2, 
            pooling=15, 
            temp_span=0.05, 
            bottleneck=3)

initial_state = copy.deepcopy(model.state_dict())
criterion = nn.NLLLoss
optimizer = optim.AdamW

batch_size = 64
lr = 0.001
max_epochs = 100

ckp_dirname = 'runs_training_pipeline'
verbose = True

# train set, session_T
ind = meta[meta['session']=='session_1'].sort_values(by=['session', 'run', 'trial_id']).index
trainX, trainY = np.copy(X[ind]), np.copy(y[ind])
# test set, session_E
ind = meta[meta['session']=='session_0'].sort_values(by=['session', 'run', 'trial_id']).index
validX, validY = np.copy(X[ind]), np.copy(y[ind])

trainX, validX = generate_tensors(trainX, validX, dtype=torch.float)
trainY, validY = generate_tensors(trainY, validY, dtype=torch.long)

torch.cuda.empty_cache()
ckp = Checkpoint(dirname=ckp_dirname)
train_end_ckp = TrainEndCheckpoint(dirname=ckp_dirname)

model.load_state_dict(copy.deepcopy(initial_state))
train_split = predefined_split(
    skorch.dataset.Dataset(
        validX, validY))
net = NeuralNetClassifierNoLog(model,
        criterion=criterion,
        optimizer=optimizer,
        optimizer__lr=lr,
        optimizer__weight_decay=0,
        batch_size=batch_size,
        device=device,
        train_split=train_split,
        iterator_train__shuffle=True,
        callbacks=[
            ('train_acc', EpochScoring('accuracy', 
                                    name='train_acc', 
                                    on_train=True, 
                                    lower_is_better=False)),
            ('lr_scheduler', LRScheduler('CosineAnnealingLR', T_max=max_epochs - 1)),
            ckp,
            train_end_ckp
        ],
        verbose=verbose)
net = net.fit(
    trainX, y=trainY, epochs=max_epochs)
net.load_params(checkpoint=ckp)

  epoch    train_acc    train_loss    valid_acc    valid_loss    cp      lr     dur
-------  -----------  ------------  -----------  ------------  ----  ------  ------
      1       [36m0.2986[0m        [32m1.9316[0m       [35m0.3090[0m        [31m2.0845[0m     +  0.0010  3.1516
      2       [36m0.4271[0m        [32m1.5461[0m       0.2812        2.5431        0.0010  2.1912
      3       [36m0.4549[0m        [32m1.2975[0m       [35m0.3715[0m        [31m1.9438[0m     +  0.0010  2.2093
      4       [36m0.5938[0m        [32m0.9878[0m       [35m0.3889[0m        2.0292        0.0010  2.1937
      5       [36m0.6493[0m        [32m0.8703[0m       [35m0.4097[0m        [31m1.7038[0m     +  0.0010  2.2328
      6       [36m0.7083[0m        [32m0.7619[0m       [35m0.4236[0m        1.7468        0.0010  2.2357
      7       [36m0.7535[0m        [32m0.5907[0m       0.3993        1.8018        0.0010  2.2300
      8       [36m0.7917[0m        [32m0.55