### Clinical BCI Challenge-WCCI2020
- [website link](https://sites.google.com/view/bci-comp-wcci/?fbclid=IwAR37WLQ_xNd5qsZvktZCT8XJerHhmVb_bU5HDu69CnO85DE3iF0fs57vQ6M)


 - [Dataset Link](https://github.com/5anirban9/Clinical-Brain-Computer-Interfaces-Challenge-WCCI-2020-Glasgow)
 
 
 - [Braindecode Tutorial](https://braindecode.org/auto_examples/plot_bcic_iv_2a_moabb_trial.html)

In [1]:
import braindecode

In [2]:
import mne
from scipy.io import loadmat
import scipy
import sklearn
import numpy as np
import pandas as pd
import glob
from mne.decoding import CSP
import os

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as lda

In [4]:
import warnings
warnings.filterwarnings('ignore') # to ignore warnings

In [5]:
verbose = False                    # global variable to suppress output display of MNE functions
mne.set_log_level(verbose=verbose) # to suppress large info outputs

In [6]:
verbose_clf = False # control output of FBCSP function
freqs_band = np.linspace(8, 32, 7) # filter bank choice
cv = 10
train_ratio = 0.75 # 75:25 for trian-valid split

In [7]:
n_jobs = None  # for multicore parallel processing, set it to 1 if cause memory issues, for full utilization set to -1

## Data Loading and Conversion to MNE Datatypes
[Mike Cohen Tutorials link for EEG Preprocessing](https://www.youtube.com/watch?v=uWB5tjhataY&list=PLn0OLiymPak2gDD-VDA90w9_iGDgOOb2o)

In [8]:
current_folder = globals()['_dh'][0]  # a hack to get path of current folder in which jupyter file is located
data_path = os.path.join(current_folder, 'Data')

In [9]:
# although we have evaluation files but their labels aren't opensource so we won't use them here
all_files        = glob.glob(data_path + '/*.mat')
training_files   = glob.glob(data_path + '/*T.mat')
evaluation_files = glob.glob(data_path + '/*E.mat')
len(all_files), len(training_files), len(evaluation_files)     # if these return zero,then no file is loaded

(18, 8, 10)

In [10]:
# we have modified the labels values from [1, 2] to [0, 1] as pytorch 
# expects labels/classes to be in [0, n_classes-1] format
def get_mne_epochs(filepath, verbose=verbose, t_start=2, fs=512, mode='train'):
    '''
    This function reads the EEG data from .mat file and convert it to MNE-Python Compatible epochs
    data structure. It takes data from [0, 8] sec range and return it by setting t = 0 at cue onset
    i.e. 3 seconds and dropping first two seconds so the output data is in [-1.0, 5.0] sec range. The
    Details can be found in the preprocessing section of the attached document
    '''
    mat_data = loadmat(filepath) # read .mat file
    eeg_data= mat_data['RawEEGData']
    idx_start = fs*t_start      
    eeg_data = eeg_data[:, :, idx_start:]
    event_id = {'left-hand': 0, 'right-hand': 1} # pytorch expects labels in [0, n_classes-1]
    channel_names = ['F3', 'FC3', 'C3', 'CP3', 'P3', 'FCz', 'CPz', 'F4', 'FC4', 'C4', 'CP4', 'P4']
    info = mne.create_info(ch_names=channel_names, sfreq=fs, ch_types='eeg')
    epochs = mne.EpochsArray(eeg_data, info, verbose=verbose, tmin=t_start-3.0)
    epochs.set_montage('standard_1020')
    epochs.filter(1., None) 
    epochs.apply_baseline(baseline=(-.250, 0)) # linear baseline correction
    
    if mode == 'train': # this in only applicable for training data
        epochs.event_id = event_id 
        epochs.events[:,2] = mat_data['Labels'].ravel() - 1    
    return epochs 

def get_labels(filepath):
    mat_data = loadmat(filepath) # read .mat file
    return mat_data['Labels'].ravel() - 1

In [11]:
epochs, labels = get_mne_epochs(training_files[0], verbose=verbose), get_labels(training_files[0])
data = epochs.get_data()
print('Shape of EEG Data: ', data.shape, '\t Shape of Labels: ', labels.shape) 

Shape of EEG Data:  (80, 12, 3072) 	 Shape of Labels:  (80,)


In [12]:
labels

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8)

### Training Data

In [13]:
# loading original data
epochs_list_train = []
for i in training_files:
    epochs_list_train.append(get_mne_epochs(i, verbose=verbose))

### Evaluation Data
first 8 for single subject and last 2 are for cross subject

In [14]:
epochs_list_eval = []
for i in evaluation_files:
    epochs_list_eval.append(get_mne_epochs(i, mode='test', verbose=verbose))

## Deep Learning with Braindecode 

### Preprocessing
https://braindecode.org/auto_examples/plot_mne_dataset_example.html

https://braindecode.org/auto_examples/plot_bcic_iv_2a_moabb_trial.html

Applying NumpyPreproc to mne.epochs give error related to apply_function implementation: https://github.com/braindecode/braindecode/issues/160

can modify the description attribute of datasets by manually passing a pandas dataframe/series and then split accordingly

the issue of size mismatch means we are using a different shape input as compared to the built-in models, using EEGNet somehow solves it

Target 2 is out of bounds: class labels should be [0, num_classes-1]

loading and saving skorch model https://skorch.readthedocs.io/en/stable/user/save_load.html

In [183]:
from braindecode.datautil import create_from_mne_epochs

# convert epochs to braindecode compatible datastructure 
# 2sec windows with 0.5sec stride
window_size = 1024 #50 # 3072
window_stride = 256 # 50

windows_datasets = create_from_mne_epochs(
    [epochs_list_train[0]], # list of epochs
    window_size_samples = window_size,
    window_stride_samples = window_stride,
    drop_last_window = False
)

In [184]:
train_set = windows_datasets

In [185]:
train_set.datasets[50].y

array([1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int64)

In [186]:
labels = []
for i in range(len(train_set.datasets)):
    labels.extend(train_set.datasets[i].y)
labels = np.array(labels) # subtracting 1 as pytorch nll expects labels to be in [0, n_classes-1]
train_set.description = pd.DataFrame(data=labels, columns=['labels'])

In [187]:
sum(np.array(labels)==0), sum(np.array(labels)==1)

(360, 360)

In [131]:
# train_set.description = pd.DataFrame(data=labels, columns=['labels'])
# train_set.description['session'] = None # fill that up later with train/test 
# df = train_set.description
# split_idx = int(len(df)*0.75) # hold-out CV with 75:25 split 
# df['session'][:split_idx], df['session'][split_idx:] = 'train', 'test'

In [188]:
train_set.description

Unnamed: 0,labels
0,0
1,0
2,0
3,0
4,0
...,...
715,0
716,0
717,0
718,0


In [189]:
windows_datasets.datasets[0].windows.get_data()[0,0,:5]

array([-31.29383482,  87.77848413, 134.99000647,  60.95198251,
       -50.06313685])

In [190]:
from braindecode.datautil.preprocess import exponential_moving_standardize
from braindecode.datautil.preprocess import MNEPreproc, NumpyPreproc, preprocess

low_cut_hz = 7.  # low cut frequency for filtering
high_cut_hz = 32.  # high cut frequency for filtering
# Parameters for exponential moving standardization
factor_new = 1e-3
init_block_size = 1000

# using NumpyPreproc with exponential_moving_standardize gave error so 
# I passed a custom callable to MnePreproc
def custom_exp_moving_std_fn(epochs, factor_new=factor_new, init_block_size=init_block_size):
    data = epochs.get_data()
    for i in range(len(data)):
        epochs._data[i] = exponential_moving_standardize(data[i], 
                        factor_new=factor_new, init_block_size=init_block_size)
    return epochs

preprocessors = [
    # keep only EEG sensors
    MNEPreproc(fn='pick_types', eeg=True, meg=False, stim=False),
    # convert from volt to microvolt, directly modifying the numpy array, 
    # I think my data is already in microvolts
    # NumpyPreproc(fn=lambda x: x * 1e6),
    # bandpass filter
    MNEPreproc(fn='filter', l_freq=low_cut_hz, h_freq=high_cut_hz),
    # exponential moving standardization
    MNEPreproc(fn=custom_exp_moving_std_fn, factor_new=factor_new,
        init_block_size=init_block_size)
]

In [191]:
preprocess(windows_datasets, preprocessors)
windows_datasets.datasets[0].windows.get_data()[0,0,:5]

array([1.23328384, 2.57082541, 3.72159681, 4.59938505, 5.14926365])

In [195]:
# Creating a model
import torch
from braindecode.util import set_random_seeds
from braindecode.models import ShallowFBCSPNet, EEGNetv4

cuda = torch.cuda.is_available()  # check if GPU is available, if True chooses to use it
device = 'cuda' if cuda else 'cpu'
if cuda:
    torch.backends.cudnn.benchmark = True
seed = 20200220  # random seed to make results reproducible
# Set random seed to be able to reproduce results
set_random_seeds(seed=seed, cuda=cuda)

n_classes=2
# Extract number of chans and time steps from dataset
n_chans = train_set[0][0].shape[0]
input_window_samples = train_set[0][0].shape[1]

model = EEGNetv4(
    n_chans,
    n_classes,
    input_window_samples = window_size, #input_window_samples,
    final_conv_length='auto',
)

# Send model to GPU
if cuda:
    model.cuda()

In [206]:
# Training time
from skorch.callbacks import LRScheduler
from skorch.helper import predefined_split
from braindecode import EEGClassifier

lr = 1 * 0.05 
weight_decay = 0.5 * 0.001

batch_size = 8 #64
n_epochs = 25

clf = EEGClassifier(
    model,
    criterion=torch.nn.NLLLoss,
    optimizer=torch.optim.AdamW,
    #train_split=predefined_split(train_set),  # using valid_set for validation
    optimizer__lr=lr,
    optimizer__weight_decay=weight_decay,
    batch_size=batch_size,
    callbacks=[
        "accuracy", ("lr_scheduler", LRScheduler('CosineAnnealingLR', T_max=n_epochs - 1)),
    ],
    device=device,
)

In [161]:
clf

<class 'braindecode.classifier.EEGClassifier'>[initialized](
  module_=EEGNetv4(
    (ensuredims): Ensure4d()
    (dimshuffle): Expression(expression=_transpose_to_b_1_c_0) 
    (conv_temporal): Conv2d(1, 8, kernel_size=(1, 64), stride=(1, 1), padding=(0, 32), bias=False)
    (bnorm_temporal): BatchNorm2d(8, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
    (conv_spatial): Conv2dWithConstraint(8, 16, kernel_size=(12, 1), stride=(1, 1), groups=8, bias=False)
    (bnorm_1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
    (elu_1): Expression(expression=elu) 
    (pool_1): AvgPool2d(kernel_size=(1, 4), stride=(1, 4), padding=0)
    (drop_1): Dropout(p=0.25, inplace=False)
    (conv_separable_depth): Conv2d(16, 16, kernel_size=(1, 16), stride=(1, 1), padding=(0, 8), groups=16, bias=False)
    (conv_separable_point): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bnorm_2): BatchNorm2d(16, eps=0.001, momentum=0.01, af

In [207]:
clf.fit(train_set, y=train_set.description.labels, epochs=n_epochs);

  epoch    train_accuracy    train_loss    valid_accuracy    valid_loss      lr     dur
-------  ----------------  ------------  ----------------  ------------  ------  ------
      1            [36m0.5035[0m        [32m1.2641[0m            [35m0.5000[0m        [31m8.6870[0m  0.0500  4.6160
      2            [36m0.7674[0m        [32m0.7391[0m            [35m0.5486[0m        [31m3.0782[0m  0.0498  5.5880
      3            [36m0.8455[0m        0.8920            0.5347        [31m1.4123[0m  0.0491  4.9990
      4            0.7552        [32m0.6417[0m            0.4722        2.4552  0.0481  5.8080
      5            0.7917        0.7602            [35m0.5694[0m        [31m1.3458[0m  0.0467  5.1710
      6            0.7396        [32m0.4640[0m            0.5417        1.4634  0.0448  4.8490
      7            [36m0.9375[0m        [32m0.4465[0m            [35m0.5972[0m        [31m0.9277[0m  0.0427  4.8480
      8            0.7517        0.4938       

In [203]:
# Model training for a specified number of epochs. `y` is None as it is already supplied
# in the dataset.
clf.fit(train_set, y=train_set.description.labels, epochs=n_epochs);

Re-initializing optimizer because the following parameters were re-set: lr, weight_decay.
  epoch    train_accuracy    train_loss    valid_accuracy    valid_loss      lr     dur
-------  ----------------  ------------  ----------------  ------------  ------  ------
      1            [36m0.9670[0m        [32m0.2535[0m            [35m0.8194[0m        [31m0.5020[0m  0.0100  6.0250
      2            0.7726        [32m0.2463[0m            0.6389        1.1504  0.0100  6.1590
      3            0.8733        [32m0.2108[0m            0.6458        1.1607  0.0098  5.0160
      4            [36m0.9809[0m        [32m0.1826[0m            0.8056        [31m0.4239[0m  0.0096  4.8140
      5            [36m0.9878[0m        [32m0.1551[0m            0.7847        0.6272  0.0093  4.8320
      6            [36m0.9913[0m        [32m0.1152[0m            0.7361        0.7034  0.0090  4.7670
      7            [36m1.0000[0m        0.1287            0.7778        0.5754  0.0085 

In [None]:
# saving and loading the model state
clf.save_params(f_params='model.pkl', f_optimizer='opt.pkl', f_history='history.json')
clf.initialize() # This is important!
clf.load_params(f_params='model.pkl', f_optimizer='opt.pkl', f_history='history.json')

### It's Training Time

In [306]:
from braindecode.datautil import create_from_mne_epochs

# convert epochs to braindecode compatible datastructure 
# 2sec windows with 0.250 sec stride
window_size = 1024 #50 # 3072
window_stride = 128 #256 # 50

windows_datasets_list = []
for epoch in epochs_list_train:
    windows_datasets_list.append(
            create_from_mne_epochs(
            [epoch], # list of epochs
            window_size_samples = window_size,
            window_stride_samples = window_stride,
            drop_last_window = False
        )
    )

In [314]:
def get_windows_datasets_labels(windows_dataset):
    labels = []
    for i in range(len(windows_dataset.datasets)):
        labels.extend(windows_dataset.datasets[i].y)
    return np.array(labels) 

for windows_dataset in windows_datasets_list:
    windows_dataset.description = pd.DataFrame(data=get_windows_datasets_labels(windows_dataset), 
                                           columns=['labels'])

In [322]:
print("Total Windows in a Single Dataset: ", len(windows_datasets_list[0].description))

Total Windows in a Single Dataset:  1360


In [315]:
from braindecode.datautil.preprocess import exponential_moving_standardize
from braindecode.datautil.preprocess import MNEPreproc, NumpyPreproc, preprocess

low_cut_hz = 7.  # low cut frequency for filtering
high_cut_hz = 32.  # high cut frequency for filtering
# Parameters for exponential moving standardization
factor_new = 1e-3
init_block_size = 1000

def custom_exp_moving_std_fn(epochs, factor_new=factor_new, init_block_size=init_block_size):
    data = epochs.get_data()
    for i in range(len(data)):
        epochs._data[i] = exponential_moving_standardize(data[i], 
                        factor_new=factor_new, init_block_size=init_block_size)
    return epochs

preprocessors = [
    # keep only EEG sensors
    MNEPreproc(fn='pick_types', eeg=True, meg=False, stim=False),
    # bandpass filter
    MNEPreproc(fn='filter', l_freq=low_cut_hz, h_freq=high_cut_hz),
    # exponential moving standardization
    MNEPreproc(fn=custom_exp_moving_std_fn, factor_new=factor_new,
        init_block_size=init_block_size)
]

In [316]:
for windows_dataset in windows_datasets_list: 
    preprocess(windows_dataset, preprocessors)

In [323]:
batch_size = 16 #64
n_epochs = 20 #25

In [None]:
# Creating a model
import torch
from braindecode.util import set_random_seeds
from braindecode.models import ShallowFBCSPNet, EEGNetv4

cuda = torch.cuda.is_available()  # check if GPU is available, if True chooses to use it
device = 'cuda' if cuda else 'cpu'
if cuda:
    torch.backends.cudnn.benchmark = True
seed = 20200220  # random seed to make results reproducible
# Set random seed to be able to reproduce results
set_random_seeds(seed=seed, cuda=cuda)

n_classes=2
# Extract number of chans and time steps from dataset
n_chans = windows_datasets_list[0][0].shape[0]
input_window_samples = windows_datasets_list[0][0].shape[1]

model = EEGNetv4(
    n_chans,
    n_classes,
    input_window_samples = window_size, #input_window_samples,
    final_conv_length='auto',
)

# Send model to GPU
if cuda:
    model.cuda()

In [325]:
# Training time
from skorch.callbacks import LRScheduler
from skorch.helper import predefined_split
from braindecode import EEGClassifier

lr = 1 * 0.02 
weight_decay = 0.5 * 0.001

clfs_list = []
for i in range(len(epochs_list_train)):
    clfs_list.append(
        EEGClassifier(
                    model,
                    criterion=torch.nn.NLLLoss,
                    optimizer=torch.optim.AdamW,
                    #train_split=predefined_split(train_set),  # using valid_set for validation
                    optimizer__lr=lr,
                    optimizer__weight_decay=weight_decay,
                    batch_size=batch_size,
                    callbacks=[
                        "accuracy", ("lr_scheduler", LRScheduler('CosineAnnealingLR', T_max=n_epochs - 1)),
                    ],
                    device=device,
                    )
                )

In [326]:
def training_function(subject_index=0):
    print('\n', '#'*25, 'Training for Subject:', subject_index+1, '#'*25, '\n')
    dataset = windows_datasets_list[subject_index]
    clfs_list[subject_index].fit(dataset, y=dataset.description.labels, epochs=n_epochs);
    best_validation_acc = clfs_list[subject_index].callbacks_[4][1].best_score_ # a hack to get best validation accuracy
    best_validation_kappa = (2*best_validation_acc)-1
    print("Best Cross Validation Kappa Score: {:.2f}".format(best_validation_kappa))

In [327]:
for subject in range(len(training_files)):
    training_function(subject)


 ######################### Training for Subject: 1 ######################### 

  epoch    train_accuracy    train_loss    valid_accuracy    valid_loss      lr      dur
-------  ----------------  ------------  ----------------  ------------  ------  -------
      1            [36m0.5846[0m        [32m0.4500[0m            [35m0.5000[0m        [31m3.1665[0m  0.0200  10.7910
      2            [36m0.6471[0m        [32m0.2812[0m            0.5000        3.4927  0.0199  9.6790
      3            [36m0.7849[0m        0.3008            [35m0.5956[0m        [31m1.0324[0m  0.0195  9.9190
      4            [36m0.8355[0m        0.2879            0.5809        [31m0.9754[0m  0.0188  8.3600
      5            [36m0.9862[0m        [32m0.2319[0m            [35m0.6250[0m        1.0986  0.0179  8.6610
      6            0.7932        [32m0.2257[0m            0.5515        2.3600  0.0168  8.1550
      7            0.6406        0.2371            0.5110        2.2768  0.015

     12            0.9936        0.1382            0.5919        2.0714  0.0075  7.7050
     13            [36m0.9972[0m        [32m0.0787[0m            0.5993        2.1698  0.0060  7.7630
     14            0.9779        0.0924            0.5882        2.8224  0.0045  7.7300
     15            [36m1.0000[0m        [32m0.0744[0m            0.6471        2.0356  0.0032  7.7360
     16            1.0000        [32m0.0602[0m            0.6434        2.0673  0.0021  7.7760
     17            1.0000        0.0629            0.6360        2.3021  0.0012  8.2210
     18            1.0000        0.0846            0.6324        2.4866  0.0005  7.8140
     19            1.0000        [32m0.0531[0m            0.6324        2.4980  0.0001  7.7970
     20            1.0000        0.0786            0.6324        2.4779  0.0000  7.7300
Best Cross Validation Kappa Score: 0.49

 ######################### Training for Subject: 5 ######################### 

  epoch    train_accuracy    train

      2            [36m0.6783[0m        [32m0.5039[0m            [35m0.5882[0m        [31m0.9278[0m  0.0199  9.1400
      3            [36m0.7987[0m        [32m0.4400[0m            [35m0.6324[0m        [31m0.7870[0m  0.0195  7.7520
      4            [36m0.8392[0m        [32m0.4353[0m            [35m0.6507[0m        0.7998  0.0188  7.7070
      5            [36m0.8465[0m        [32m0.3567[0m            [35m0.6691[0m        [31m0.7436[0m  0.0179  7.7840
      6            [36m0.9357[0m        [32m0.3241[0m            [35m0.6985[0m        [31m0.7387[0m  0.0168  7.7670
      7            0.6121        0.3307            0.5551        1.6985  0.0155  7.8750
      8            [36m0.9412[0m        [32m0.2459[0m            [35m0.7500[0m        [31m0.6240[0m  0.0140  7.7620
      9            0.7941        0.2543            0.6066        1.6199  0.0125  7.7360
     10            0.8732        [32m0.2328[0m            0.6507        1.1218  0.0108 

### Results


### It's Training Time with 0.5, 4.5 sec only

In [28]:
from braindecode.datautil import create_from_mne_epochs

# convert epochs to braindecode compatible datastructure 
# 2sec windows with 0.250 sec stride
window_size = 1024 #50 # 3072
window_stride = 128 #256 # 50

windows_datasets_list = []
for epoch in epochs_list_train:
    windows_datasets_list.append(
            create_from_mne_epochs(
            [epoch.crop(tmin=0.5, tmax=4.5, include_tmax=False)], # [0.5, 4.5] s, expects list of epochs
            window_size_samples = window_size,
            window_stride_samples = window_stride,
            drop_last_window = False
        )
    )

In [30]:
def get_windows_datasets_labels(windows_dataset):
    labels = []
    for i in range(len(windows_dataset.datasets)):
        labels.extend(windows_dataset.datasets[i].y)
    return np.array(labels) 

for windows_dataset in windows_datasets_list:
    windows_dataset.description = pd.DataFrame(data=get_windows_datasets_labels(windows_dataset), 
                                           columns=['labels'])

In [31]:
print("Total Windows in a Single Dataset: ", len(windows_datasets_list[0].description))

Total Windows in a Single Dataset:  720


In [45]:
from braindecode.datautil.preprocess import exponential_moving_standardize
from braindecode.datautil.preprocess import MNEPreproc, NumpyPreproc, preprocess

low_cut_hz = 7.  # low cut frequency for filtering
high_cut_hz = 32.  # high cut frequency for filtering
# Parameters for exponential moving standardization
factor_new = 1e-3
init_block_size = 1000

def custom_exp_moving_std_fn(epochs, factor_new=factor_new, init_block_size=init_block_size):
    data = epochs.get_data()
    for i in range(len(data)):
        epochs._data[i] = exponential_moving_standardize(data[i], 
                        factor_new=factor_new, init_block_size=init_block_size)
    return epochs

preprocessors = [
    # keep only EEG sensors
    MNEPreproc(fn='pick_types', eeg=True, meg=False, stim=False),
    # bandpass filter
    MNEPreproc(fn='filter', l_freq=low_cut_hz, h_freq=high_cut_hz),
    # exponential moving standardization
    MNEPreproc(fn=custom_exp_moving_std_fn, factor_new=factor_new,
        init_block_size=init_block_size)
]

In [33]:
for windows_dataset in windows_datasets_list: 
    preprocess(windows_dataset, preprocessors)

In [34]:
batch_size = 16 #64
n_epochs = 20 #25

In [37]:
# Creating a model
import torch
from braindecode.util import set_random_seeds
from braindecode.models import ShallowFBCSPNet, EEGNetv4

cuda = torch.cuda.is_available()  # check if GPU is available, if True chooses to use it
device = 'cuda' if cuda else 'cpu'
if cuda:
    torch.backends.cudnn.benchmark = True
seed = 20200220  # random seed to make results reproducible
# Set random seed to be able to reproduce results
set_random_seeds(seed=seed, cuda=cuda)

n_classes=2
# Extract number of chans and time steps from dataset
n_chans = windows_datasets_list[0][0][0].shape[0]
input_window_samples = windows_datasets_list[0][0][0].shape[1]

model = EEGNetv4(
    n_chans,
    n_classes,
    input_window_samples = window_size, #input_window_samples,
    final_conv_length='auto',
)

# Send model to GPU
if cuda:
    model.cuda()

In [38]:
# Training time
from skorch.callbacks import LRScheduler
from skorch.helper import predefined_split
from braindecode import EEGClassifier

lr = 1 * 0.02 
weight_decay = 0.5 * 0.001

clfs_list = []
for i in range(len(epochs_list_train)):
    clfs_list.append(
        EEGClassifier(
                    model,
                    criterion=torch.nn.NLLLoss,
                    optimizer=torch.optim.AdamW,
                    #train_split=predefined_split(train_set),  # using valid_set for validation
                    optimizer__lr=lr,
                    optimizer__weight_decay=weight_decay,
                    batch_size=batch_size,
                    callbacks=[
                        "accuracy", ("lr_scheduler", LRScheduler('CosineAnnealingLR', T_max=n_epochs - 1)),
                    ],
                    device=device,
                    )
                )

In [39]:
def training_function(subject_index=0):
    print('\n', '#'*25, 'Training for Subject:', subject_index+1, '#'*25, '\n')
    dataset = windows_datasets_list[subject_index]
    clfs_list[subject_index].fit(dataset, y=dataset.description.labels, epochs=n_epochs);
    best_validation_acc = clfs_list[subject_index].callbacks_[4][1].best_score_ # a hack to get best validation accuracy
    best_validation_kappa = (2*best_validation_acc)-1
    print("Best Cross Validation Kappa Score: {:.2f}".format(best_validation_kappa))

In [40]:
for subject in range(len(training_files)):
    training_function(subject)


 ######################### Training for Subject: 1 ######################### 

  epoch    train_accuracy    train_loss    valid_accuracy    valid_loss      lr     dur
-------  ----------------  ------------  ----------------  ------------  ------  ------
      1            [36m0.5000[0m        [32m0.8045[0m            [35m0.5000[0m        [31m4.2305[0m  0.0200  7.0333
      2            0.5000        [32m0.5399[0m            0.5000        5.3087  0.0199  5.9203
      3            [36m0.6840[0m        [32m0.3828[0m            [35m0.5069[0m        [31m3.6704[0m  0.0195  5.2713
      4            0.6753        [32m0.2885[0m            [35m0.5139[0m        3.8709  0.0188  4.7727
      5            [36m0.8490[0m        0.3193            [35m0.6181[0m        [31m1.6126[0m  0.0179  4.7089
      6            0.7344        [32m0.2852[0m            0.5278        2.9063  0.0168  4.6951
      7            [36m0.9306[0m        [32m0.2309[0m            [35m0.6250

     11            0.9965        0.0567            0.6458        2.9023  0.0092  5.1639
     12            [36m1.0000[0m        0.1357            0.6181        1.9206  0.0075  4.8081
     13            1.0000        [32m0.0270[0m            0.6319        2.1861  0.0060  4.5469
     14            0.9983        0.0425            0.6389        2.4431  0.0045  4.7331
     15            1.0000        0.0370            0.6181        1.6782  0.0032  4.5736
     16            1.0000        0.0417            0.6389        2.3200  0.0021  4.7910
     17            1.0000        0.0346            0.6528        2.1852  0.0012  4.5379
     18            1.0000        [32m0.0245[0m            0.6458        2.2942  0.0005  4.4728
     19            1.0000        0.0359            0.6528        2.2856  0.0001  4.8070
     20            1.0000        0.0389            0.6528        2.2507  0.0000  4.7285
Best Cross Validation Kappa Score: 0.40

 ######################### Training for Subject: 5 #

      3            [36m0.8646[0m        [32m0.4170[0m            [35m0.7431[0m        [31m0.5289[0m  0.0195  4.8693
      4            [36m0.9028[0m        [32m0.3524[0m            [35m0.7917[0m        [31m0.4134[0m  0.0188  6.0799
      5            0.5486        [32m0.3232[0m            0.5208        1.8897  0.0179  4.7583
      6            0.7188        [32m0.2822[0m            0.6319        0.9857  0.0168  4.7701
      7            [36m0.9323[0m        [32m0.1956[0m            [35m0.8472[0m        [31m0.3701[0m  0.0155  4.9785
      8            [36m0.9566[0m        [32m0.1949[0m            0.8125        0.4422  0.0140  4.9737
      9            0.7066        [32m0.1432[0m            0.5347        2.2145  0.0125  5.0568
     10            0.5729        [32m0.1379[0m            0.5208        2.2381  0.0108  5.2219
     11            0.9340        [32m0.1257[0m            0.7569        0.7963  0.0092  4.6846
     12            0.6285        [32

### It's Training Time with 0.5, 4.5 sec only with a bit more windows but with 10 epochs only

as the issue seems to be overfitting as training acc is increasing but valid isn't

In [41]:
from braindecode.datautil import create_from_mne_epochs

# convert epochs to braindecode compatible datastructure 
# 2sec windows with 0.250 sec stride
window_size = 1024 #50 # 3072
window_stride = 64 #256 # 50

windows_datasets_list = []
for epoch in epochs_list_train:
    windows_datasets_list.append(
            create_from_mne_epochs(
            [epoch.crop(tmin=0.5, tmax=4.5, include_tmax=False)], # [0.5, 4.5] s, expects list of epochs
            window_size_samples = window_size,
            window_stride_samples = window_stride,
            drop_last_window = False
        )
    )

In [42]:
def get_windows_datasets_labels(windows_dataset):
    labels = []
    for i in range(len(windows_dataset.datasets)):
        labels.extend(windows_dataset.datasets[i].y)
    return np.array(labels) 

for windows_dataset in windows_datasets_list:
    windows_dataset.description = pd.DataFrame(data=get_windows_datasets_labels(windows_dataset), 
                                           columns=['labels'])

In [46]:
print("Total Windows in a Single Dataset: ", len(windows_datasets_list[0].description))

Total Windows in a Single Dataset:  1360


In [47]:
from braindecode.datautil.preprocess import exponential_moving_standardize
from braindecode.datautil.preprocess import MNEPreproc, NumpyPreproc, preprocess

low_cut_hz = 7.  # low cut frequency for filtering
high_cut_hz = 32.  # high cut frequency for filtering
# Parameters for exponential moving standardization
factor_new = 1e-3
init_block_size = 1000

def custom_exp_moving_std_fn(epochs, factor_new=factor_new, init_block_size=init_block_size):
    data = epochs.get_data()
    for i in range(len(data)):
        epochs._data[i] = exponential_moving_standardize(data[i], 
                        factor_new=factor_new, init_block_size=init_block_size)
    return epochs

preprocessors = [
    # keep only EEG sensors
    MNEPreproc(fn='pick_types', eeg=True, meg=False, stim=False),
    # bandpass filter
    MNEPreproc(fn='filter', l_freq=low_cut_hz, h_freq=high_cut_hz),
    # exponential moving standardization
    MNEPreproc(fn=custom_exp_moving_std_fn, factor_new=factor_new,
        init_block_size=init_block_size)
]

In [48]:
for windows_dataset in windows_datasets_list: 
    preprocess(windows_dataset, preprocessors)

In [49]:
batch_size = 16 #64
n_epochs = 10 #20 #25 few epochs for quick verification

In [50]:
# Creating a model
import torch
from braindecode.util import set_random_seeds
from braindecode.models import ShallowFBCSPNet, EEGNetv4

cuda = torch.cuda.is_available()  # check if GPU is available, if True chooses to use it
device = 'cuda' if cuda else 'cpu'
if cuda:
    torch.backends.cudnn.benchmark = True
seed = 20200220  # random seed to make results reproducible
# Set random seed to be able to reproduce results
set_random_seeds(seed=seed, cuda=cuda)

n_classes=2
# Extract number of chans and time steps from dataset
n_chans = windows_datasets_list[0][0][0].shape[0]
input_window_samples = windows_datasets_list[0][0][0].shape[1]

model = EEGNetv4(
    n_chans,
    n_classes,
    input_window_samples = window_size, #input_window_samples,
    final_conv_length='auto',
)

# Send model to GPU
if cuda:
    model.cuda()

In [51]:
# Training time
from skorch.callbacks import LRScheduler
from skorch.helper import predefined_split
from braindecode import EEGClassifier

lr = 1 * 0.02 
weight_decay = 0.5 * 0.001

clfs_list = []
for i in range(len(epochs_list_train)):
    clfs_list.append(
        EEGClassifier(
                    model,
                    criterion=torch.nn.NLLLoss,
                    optimizer=torch.optim.AdamW,
                    #train_split=predefined_split(train_set),  # using valid_set for validation
                    optimizer__lr=lr,
                    optimizer__weight_decay=weight_decay,
                    batch_size=batch_size,
                    callbacks=[
                        "accuracy", ("lr_scheduler", LRScheduler('CosineAnnealingLR', T_max=n_epochs - 1)),
                    ],
                    device=device,
                    )
                )

In [52]:
def training_function(subject_index=0):
    print('\n', '#'*25, 'Training for Subject:', subject_index+1, '#'*25, '\n')
    dataset = windows_datasets_list[subject_index]
    clfs_list[subject_index].fit(dataset, y=dataset.description.labels, epochs=n_epochs);
    best_validation_acc = clfs_list[subject_index].callbacks_[4][1].best_score_ # a hack to get best validation accuracy
    best_validation_kappa = (2*best_validation_acc)-1
    print("Best Cross Validation Kappa Score: {:.2f}".format(best_validation_kappa))

In [53]:
for subject in range(len(training_files)):
    training_function(subject)


 ######################### Training for Subject: 1 ######################### 

  epoch    train_accuracy    train_loss    valid_accuracy    valid_loss      lr     dur
-------  ----------------  ------------  ----------------  ------------  ------  ------
      1            [36m0.8631[0m        [32m0.7389[0m            [35m0.6360[0m        [31m0.7081[0m  0.0200  9.8601
      2            0.5064        [32m0.3883[0m            0.5000        6.7701  0.0194  9.4981
      3            [36m0.9118[0m        [32m0.3356[0m            0.6360        1.2157  0.0177  9.2757
      4            [36m0.9724[0m        [32m0.2187[0m            [35m0.7022[0m        0.8782  0.0150  9.6769
      5            0.6360        [32m0.1798[0m            0.5588        1.9197  0.0117  9.2238
      6            [36m0.9881[0m        [32m0.1125[0m            0.6654        0.8798  0.0083  9.5259
      7            0.8989        [32m0.0927[0m            0.6471        1.0006  0.0050  9.2492
  

  epoch    train_accuracy    train_loss    valid_accuracy    valid_loss      lr     dur
-------  ----------------  ------------  ----------------  ------------  ------  ------
      1            [36m0.7178[0m        [32m0.7141[0m            [35m0.5184[0m        [31m1.2939[0m  0.0200  9.6275
      2            [36m0.7647[0m        [32m0.4544[0m            [35m0.7978[0m        [31m0.3998[0m  0.0194  9.1792
      3            [36m0.9081[0m        [32m0.3771[0m            0.7353        0.5899  0.0177  8.8885
      4            [36m0.9246[0m        [32m0.3164[0m            0.7206        0.7690  0.0150  8.9194
      5            0.5625        [32m0.2410[0m            0.5074        3.1520  0.0117  8.9581
      6            [36m0.9256[0m        [32m0.1964[0m            0.6581        1.0022  0.0083  9.1344
      7            [36m0.9301[0m        [32m0.1255[0m            0.6581        1.2392  0.0050  9.2434
      8            0.9274        [32m0.1230[0m       

In [None]:
#next time try whole trial once without much cropping

### It's Training Time with 0.5, 4.5 sec only with a single windows but with 10 epochs only

as the issue seems to be overfitting as training acc is increasing but valid isn't

In [92]:
from braindecode.datautil import create_from_mne_epochs

# convert epochs to braindecode compatible datastructure 
window_size = 2030 #50 # 3072
window_stride = 1024 #256 # 50

windows_datasets_list = []
for epoch in epochs_list_train:
    windows_datasets_list.append(
            create_from_mne_epochs(
            [epoch.crop(tmin=0.5, tmax=4.5, include_tmax=False)], # [0.5, 4.5] s, expects list of epochs
            window_size_samples = window_size,
            window_stride_samples = window_stride,
            drop_last_window = False
        )
    )

In [93]:
def get_windows_datasets_labels(windows_dataset):
    labels = []
    for i in range(len(windows_dataset.datasets)):
        labels.extend(windows_dataset.datasets[i].y)
    return np.array(labels) 

for windows_dataset in windows_datasets_list:
    windows_dataset.description = pd.DataFrame(data=get_windows_datasets_labels(windows_dataset), 
                                           columns=['labels'])

In [94]:
print("Total Windows in a Single Dataset: ", len(windows_datasets_list[0].description))

Total Windows in a Single Dataset:  160


In [95]:
from braindecode.datautil.preprocess import exponential_moving_standardize
from braindecode.datautil.preprocess import MNEPreproc, NumpyPreproc, preprocess

low_cut_hz = 7.  # low cut frequency for filtering
high_cut_hz = 32.  # high cut frequency for filtering
# Parameters for exponential moving standardization
factor_new = 1e-3
init_block_size = 1000

def custom_exp_moving_std_fn(epochs, factor_new=factor_new, init_block_size=init_block_size):
    data = epochs.get_data()
    for i in range(len(data)):
        epochs._data[i] = exponential_moving_standardize(data[i], 
                        factor_new=factor_new, init_block_size=init_block_size)
    return epochs

preprocessors = [
    # keep only EEG sensors
    MNEPreproc(fn='pick_types', eeg=True, meg=False, stim=False),
    # bandpass filter
    MNEPreproc(fn='filter', l_freq=low_cut_hz, h_freq=high_cut_hz),
    # exponential moving standardization
    MNEPreproc(fn=custom_exp_moving_std_fn, factor_new=factor_new,
        init_block_size=init_block_size)
]

In [96]:
for windows_dataset in windows_datasets_list: 
    preprocess(windows_dataset, preprocessors)

In [97]:
batch_size = 16 #64
n_epochs = 10 #20 #25 few epochs for quick verification

In [98]:
# Creating a model
import torch
from braindecode.util import set_random_seeds
from braindecode.models import ShallowFBCSPNet, EEGNetv4

cuda = torch.cuda.is_available()  # check if GPU is available, if True chooses to use it
device = 'cuda' if cuda else 'cpu'
if cuda:
    torch.backends.cudnn.benchmark = True
seed = 20200220  # random seed to make results reproducible
# Set random seed to be able to reproduce results
set_random_seeds(seed=seed, cuda=cuda)

n_classes=2
# Extract number of chans and time steps from dataset
n_chans = windows_datasets_list[0][0][0].shape[0]
input_window_samples = windows_datasets_list[0][0][0].shape[1]

model = EEGNetv4(
    n_chans,
    n_classes,
    input_window_samples = window_size, #input_window_samples,
    final_conv_length='auto',
)

# Send model to GPU
if cuda:
    model.cuda()

In [99]:
# Training time
from skorch.callbacks import LRScheduler
from skorch.helper import predefined_split
from braindecode import EEGClassifier

lr = 1 * 0.02 
weight_decay = 0.5 * 0.001

clfs_list = []
for i in range(len(epochs_list_train)):
    clfs_list.append(
        EEGClassifier(
                    model,
                    criterion=torch.nn.NLLLoss,
                    optimizer=torch.optim.AdamW,
                    #train_split=predefined_split(train_set),  # using valid_set for validation
                    optimizer__lr=lr,
                    optimizer__weight_decay=weight_decay,
                    batch_size=batch_size,
                    callbacks=[
                        "accuracy", ("lr_scheduler", LRScheduler('CosineAnnealingLR', T_max=n_epochs - 1)),
                    ],
                    device=device,
                    )
                )

In [100]:
def training_function(subject_index=0):
    print('\n', '#'*25, 'Training for Subject:', subject_index+1, '#'*25, '\n')
    dataset = windows_datasets_list[subject_index]
    clfs_list[subject_index].fit(dataset, y=dataset.description.labels, epochs=n_epochs);
    best_validation_acc = clfs_list[subject_index].callbacks_[4][1].best_score_ # a hack to get best validation accuracy
    best_validation_kappa = (2*best_validation_acc)-1
    print("Best Cross Validation Kappa Score: {:.2f}".format(best_validation_kappa))

In [101]:
for subject in range(len(training_files)):
    training_function(subject)


 ######################### Training for Subject: 1 ######################### 

  epoch    train_accuracy    train_loss    valid_accuracy    valid_loss      lr     dur
-------  ----------------  ------------  ----------------  ------------  ------  ------
      1            [36m0.6094[0m        [32m0.7857[0m            [35m0.5312[0m        [31m0.8298[0m  0.0200  1.9470
      2            0.5156        [32m0.4767[0m            0.5000        1.9009  0.0194  1.9160
      3            0.5000        [32m0.2453[0m            0.5000        3.4557  0.0177  1.8300
      4            0.5000        [32m0.1696[0m            0.5000        4.4461  0.0150  1.7920
      5            0.5000        [32m0.1138[0m            0.5000        5.5788  0.0117  1.7320
      6            0.5000        [32m0.0987[0m            0.5000        4.8779  0.0083  1.7310
      7            0.5000        [32m0.0527[0m            0.5000        4.7223  0.0050  1.7820
      8            0.5000        [32m

      2            [36m0.8125[0m        [32m0.7220[0m            [35m0.7500[0m        0.6651  0.0194  1.9600
      3            [36m0.9609[0m        [32m0.3669[0m            0.5625        1.5228  0.0177  2.0090
      4            0.9375        [32m0.2437[0m            0.5312        1.9882  0.0150  1.9020
      5            0.9531        [32m0.2038[0m            0.5000        2.0008  0.0117  1.9220
      6            0.9609        [32m0.1487[0m            0.5312        1.9483  0.0083  2.0790
      7            [36m0.9766[0m        0.1579            0.5625        1.7738  0.0050  2.0820
      8            [36m0.9844[0m        0.1627            0.5938        1.6637  0.0023  2.4940
      9            0.9844        [32m0.1048[0m            0.6250        1.5801  0.0006  1.9390
     10            0.9844        0.1310            0.6250        1.5213  0.0000  1.9710
Best Cross Validation Kappa Score: 0.50

 ######################### Training for Subject: 8 #################

Note: results seem better, may be I should try non-overlapping windows so all examples have their own contribution

### It's Training Time with 0.5, 4.5 sec with non-overlapping 2 windows but with 10 epochs only

as the issue seems to be overfitting as training acc is increasing but valid isn't

In [64]:
from braindecode.datautil import create_from_mne_epochs

# convert epochs to braindecode compatible datastructure 
# 2sec windows with 0.250 sec stride
window_size = 1024 #50 # 3072
window_stride = 1024 #256 # 50

windows_datasets_list = []
for epoch in epochs_list_train:
    windows_datasets_list.append(
            create_from_mne_epochs(
            [epoch.crop(tmin=0.5, tmax=4.5, include_tmax=False)], # [0.5, 4.5] s, expects list of epochs
            window_size_samples = window_size,
            window_stride_samples = window_stride,
            drop_last_window = False
        )
    )

In [65]:
def get_windows_datasets_labels(windows_dataset):
    labels = []
    for i in range(len(windows_dataset.datasets)):
        labels.extend(windows_dataset.datasets[i].y)
    return np.array(labels) 

for windows_dataset in windows_datasets_list:
    windows_dataset.description = pd.DataFrame(data=get_windows_datasets_labels(windows_dataset), 
                                           columns=['labels'])

In [66]:
print("Total Windows in a Single Dataset: ", len(windows_datasets_list[0].description))

Total Windows in a Single Dataset:  160


In [67]:
from braindecode.datautil.preprocess import exponential_moving_standardize
from braindecode.datautil.preprocess import MNEPreproc, NumpyPreproc, preprocess

low_cut_hz = 7.  # low cut frequency for filtering
high_cut_hz = 32.  # high cut frequency for filtering
# Parameters for exponential moving standardization
factor_new = 1e-3
init_block_size = 1000

def custom_exp_moving_std_fn(epochs, factor_new=factor_new, init_block_size=init_block_size):
    data = epochs.get_data()
    for i in range(len(data)):
        epochs._data[i] = exponential_moving_standardize(data[i], 
                        factor_new=factor_new, init_block_size=init_block_size)
    return epochs

preprocessors = [
    # keep only EEG sensors
    MNEPreproc(fn='pick_types', eeg=True, meg=False, stim=False),
    # bandpass filter
    MNEPreproc(fn='filter', l_freq=low_cut_hz, h_freq=high_cut_hz),
    # exponential moving standardization
    MNEPreproc(fn=custom_exp_moving_std_fn, factor_new=factor_new,
        init_block_size=init_block_size)
]

In [68]:
for windows_dataset in windows_datasets_list: 
    preprocess(windows_dataset, preprocessors)

In [69]:
batch_size = 16 #64
n_epochs = 10 #20 #25 few epochs for quick verification

In [70]:
# Creating a model
import torch
from braindecode.util import set_random_seeds
from braindecode.models import ShallowFBCSPNet, EEGNetv4

cuda = torch.cuda.is_available()  # check if GPU is available, if True chooses to use it
device = 'cuda' if cuda else 'cpu'
if cuda:
    torch.backends.cudnn.benchmark = True
seed = 20200220  # random seed to make results reproducible
# Set random seed to be able to reproduce results
set_random_seeds(seed=seed, cuda=cuda)

n_classes=2
# Extract number of chans and time steps from dataset
n_chans = windows_datasets_list[0][0][0].shape[0]
input_window_samples = windows_datasets_list[0][0][0].shape[1]

model = EEGNetv4(
    n_chans,
    n_classes,
    input_window_samples = window_size, #input_window_samples,
    final_conv_length='auto',
)

# Send model to GPU
if cuda:
    model.cuda()

In [71]:
# Training time
from skorch.callbacks import LRScheduler
from skorch.helper import predefined_split
from braindecode import EEGClassifier

lr = 1 * 0.02 
weight_decay = 0.5 * 0.001

clfs_list = []
for i in range(len(epochs_list_train)):
    clfs_list.append(
        EEGClassifier(
                    model,
                    criterion=torch.nn.NLLLoss,
                    optimizer=torch.optim.AdamW,
                    #train_split=predefined_split(train_set),  # using valid_set for validation
                    optimizer__lr=lr,
                    optimizer__weight_decay=weight_decay,
                    batch_size=batch_size,
                    callbacks=[
                        "accuracy", ("lr_scheduler", LRScheduler('CosineAnnealingLR', T_max=n_epochs - 1)),
                    ],
                    device=device,
                    )
                )

In [72]:
def training_function(subject_index=0):
    print('\n', '#'*25, 'Training for Subject:', subject_index+1, '#'*25, '\n')
    dataset = windows_datasets_list[subject_index]
    clfs_list[subject_index].fit(dataset, y=dataset.description.labels, epochs=n_epochs);
    best_validation_acc = clfs_list[subject_index].callbacks_[4][1].best_score_ # a hack to get best validation accuracy
    best_validation_kappa = (2*best_validation_acc)-1
    print("Best Cross Validation Kappa Score: {:.2f}".format(best_validation_kappa))

In [73]:
for subject in range(len(training_files)):
    training_function(subject)


 ######################### Training for Subject: 1 ######################### 

  epoch    train_accuracy    train_loss    valid_accuracy    valid_loss      lr     dur
-------  ----------------  ------------  ----------------  ------------  ------  ------
      1            [36m0.6641[0m        [32m0.8516[0m            [35m0.5625[0m        [31m0.7750[0m  0.0200  1.1591
      2            0.5156        [32m0.6555[0m            [35m0.5938[0m        1.5629  0.0194  1.0865
      3            0.5000        [32m0.4358[0m            0.5312        3.2451  0.0177  1.0006
      4            0.5000        [32m0.4021[0m            0.5000        5.8631  0.0150  1.0866
      5            0.5000        [32m0.3263[0m            0.5000        6.2139  0.0117  0.9545
      6            0.5000        [32m0.2675[0m            0.5000        5.5091  0.0083  1.3795
      7            0.5000        [32m0.1801[0m            0.5000        5.0791  0.0050  1.2650
      8            0.5000    

      2            [36m0.5703[0m        [32m0.6242[0m            [35m0.5625[0m        [31m1.1025[0m  0.0194  1.2680
      3            [36m0.6328[0m        [32m0.5345[0m            0.5625        [31m0.8642[0m  0.0177  1.1420
      4            [36m0.7031[0m        [32m0.4364[0m            [35m0.6250[0m        [31m0.7926[0m  0.0150  1.3540
      5            [36m0.8672[0m        [32m0.3806[0m            [35m0.6562[0m        [31m0.6081[0m  0.0117  1.1600
      6            [36m0.9141[0m        [32m0.2874[0m            [35m0.6875[0m        [31m0.5892[0m  0.0083  1.1540
      7            0.8906        [32m0.2688[0m            0.6250        [31m0.5826[0m  0.0050  1.1120
      8            0.8672        [32m0.2622[0m            0.6562        0.5884  0.0023  0.9950
      9            0.8906        [32m0.2316[0m            0.6250        0.5893  0.0006  0.9900
     10            0.8984        0.2479            0.6250        0.5870  0.0000  1.0570
B

## Note
Somehow the problem seems to be with data augmentation as training acc is increasing but validation one isn't. Moreover, we need a large samples not just a hundred to get some results. Maybe cropped decoding would be better but who knows!