# Results visualization
## imports

In [1]:
from pathlib import Path

import numpy as np
import pandas as pd

## Load results and sanity check

In [2]:
results = []
for path in Path('results').glob('*.csv'):
    df = pd.read_csv(path)
    df['file'] = path.name
    results.append(df)

results = pd.concat(results)

In [3]:
results

Unnamed: 0,pretrain_dur,finetune_phase1_dur,finetune_phase2_dur,pretrain_n_epochs,finetune_phase1_n_epochs,finetune_phase2_n_epochs,pretrain_train_size,pretrain_valid_size,finetune_phase1_train_size,finetune_phase1_valid_size,...,finetune_subject,finetune_scheme,finetune_fold,pretrain_train_acc,pretrain_valid_acc,finetune_phase1_train_acc,finetune_phase1_valid_acc,finetune_phase2_valid_acc,test_acc,file
0,50.338649,0.684618,0.209419,143,29,36,4620,519,480,131,...,8,Scheme 1,0,0.543939,0.500963,0.677083,0.740458,0.740458,0.65000,pre-BNCI2014_001_fin-Schirrmeister2017_sub-8.csv
1,50.338649,3.736437,1.792514,143,152,212,4620,519,480,131,...,8,Scheme 1,1,0.543939,0.500963,0.785417,0.656489,0.793893,0.72500,pre-BNCI2014_001_fin-Schirrmeister2017_sub-8.csv
2,50.338649,0.973701,0.627491,143,40,61,4620,519,480,131,...,8,Scheme 1,2,0.543939,0.500963,0.731250,0.679389,0.725191,0.65000,pre-BNCI2014_001_fin-Schirrmeister2017_sub-8.csv
3,50.338649,1.522235,0.782921,143,62,88,4620,519,480,131,...,8,Scheme 1,3,0.543939,0.500963,0.760417,0.717557,0.801527,0.68125,pre-BNCI2014_001_fin-Schirrmeister2017_sub-8.csv
4,50.338649,0.871628,0.362048,143,35,47,4620,519,480,131,...,8,Scheme 1,4,0.543939,0.500963,0.725000,0.687023,0.740458,0.68125,pre-BNCI2014_001_fin-Schirrmeister2017_sub-8.csv
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15,50.338649,8.533252,0.325035,143,170,175,4620,519,660,176,...,12,Scheme 4,0,0.543939,0.500963,0.865152,0.875000,0.897727,0.76250,pre-BNCI2014_001_fin-Schirrmeister2017_sub-12.csv
16,50.338649,9.719258,0.260761,143,200,204,4620,519,660,176,...,12,Scheme 4,1,0.543939,0.500963,0.878788,0.892045,0.892045,0.79375,pre-BNCI2014_001_fin-Schirrmeister2017_sub-12.csv
17,50.338649,4.648522,0.387688,143,94,100,4620,519,660,176,...,12,Scheme 4,2,0.543939,0.500963,0.859091,0.846591,0.869318,0.76250,pre-BNCI2014_001_fin-Schirrmeister2017_sub-12.csv
18,50.338649,18.205695,0.457421,143,371,378,4620,519,660,176,...,12,Scheme 4,3,0.543939,0.500963,0.898485,0.880682,0.886364,0.81250,pre-BNCI2014_001_fin-Schirrmeister2017_sub-12.csv


In [4]:
# check the number of results
results.groupby(
    # ['pretrain_dataset', 'finetune_dataset', 'finetune_subject']
    ['pretrain_dataset', 'finetune_dataset']  # n_subjects * 5 folds * 4 schemes
).test_acc.count()

pretrain_dataset   finetune_dataset 
BNCI2014_001       BNCI2014_001         180
                   BNCI2014_004         180
                   Schirrmeister2017    280
BNCI2014_004       BNCI2014_001         180
                   BNCI2014_004         180
                   Schirrmeister2017    280
Lee2019_MI         BNCI2014_001         180
                   BNCI2014_004         180
                   Schirrmeister2017    280
PhysionetMI        BNCI2014_001         180
                   BNCI2014_004         180
                   Schirrmeister2017    280
Schirrmeister2017  BNCI2014_001         180
                   BNCI2014_004         180
                   Schirrmeister2017    280
Name: test_acc, dtype: int64

In [7]:
# check that all experiments logged the same metrics for the same pretraining phase
assert (results.groupby('pretrain_dataset')[[
    'pretrain_dur',
    'pretrain_n_epochs',
    'pretrain_train_size',
    'pretrain_valid_size',
    'pretrain_dataset',
    'pretrain_train_acc',
    'pretrain_valid_acc',
]].nunique() == 1).all().all()

In [15]:
# check amount of data in the different subsets
# TODO inconsistent with Xie2023
print(
    results.groupby('finetune_dataset')[['finetune_phase1_train_size', 'finetune_phase1_valid_size', 'test_size']].agg(
        ['mean', 'std', 'min', 'max']))

                  finetune_phase1_train_size                       \
                                        mean        std  min  max   
finetune_dataset                                                    
BNCI2014_001                           180.0    0.00000  180  180   
BNCI2014_004                           300.0    0.00000  300  300   
Schirrmeister2017                      600.0  113.42986  240  660   

                  finetune_phase1_valid_size                       test_size  \
                                        mean        std min  max        mean   
finetune_dataset                                                               
BNCI2014_001                       58.000000   0.000000  58   58  288.000000   
BNCI2014_004                       81.777778   2.741263  80   88  315.555556   
Schirrmeister2017                 160.714286  30.204044  64  180  160.000000   

                                        
                         std  min  max  
finetune_dataset      

## Durations of the experiment phases

In [8]:
# Pre-training
results.groupby('pretrain_dataset').first().pretrain_dur

pretrain_dataset
BNCI2014_001          50.338649
BNCI2014_004          42.910408
Lee2019_MI            31.089299
PhysionetMI           54.676333
Schirrmeister2017    328.381266
Name: pretrain_dur, dtype: float64

In [16]:
# Fine-tuning
# finetuning longer on Schirrmeister
results.groupby(['finetune_dataset', 'finetune_scheme'])[['finetune_phase1_dur', 'finetune_phase2_dur']].agg(
    ['mean', 'std'])

Unnamed: 0_level_0,Unnamed: 1_level_0,finetune_phase1_dur,finetune_phase1_dur,finetune_phase2_dur,finetune_phase2_dur
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std
finetune_dataset,finetune_scheme,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
BNCI2014_001,Scheme 1,0.804482,0.61967,0.588451,0.491205
BNCI2014_001,Scheme 2,1.5256,1.006604,0.76412,0.573649
BNCI2014_001,Scheme 3,1.396093,0.845244,0.669113,0.420636
BNCI2014_001,Scheme 4,1.693155,1.083675,0.804911,0.557677
BNCI2014_004,Scheme 1,0.979419,0.957452,1.295326,8.956946
BNCI2014_004,Scheme 2,1.639533,1.426904,1.001778,1.230883
BNCI2014_004,Scheme 3,1.498243,1.327145,0.872306,1.39403
BNCI2014_004,Scheme 4,1.671008,1.436427,0.903049,0.933934
Schirrmeister2017,Scheme 1,3.009236,2.780488,2.399558,11.536148
Schirrmeister2017,Scheme 2,6.273115,4.278509,1.888961,1.774808


## Scores

In [18]:
# main test results
results.groupby(['pretrain_dataset', 'finetune_dataset', 'finetune_scheme']).test_acc.agg(['mean', 'std'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,mean,std
pretrain_dataset,finetune_dataset,finetune_scheme,Unnamed: 3_level_1,Unnamed: 4_level_1
BNCI2014_001,BNCI2014_001,Scheme 1,0.502083,0.083745
BNCI2014_001,BNCI2014_001,Scheme 2,0.53017,0.093548
BNCI2014_001,BNCI2014_001,Scheme 3,0.531713,0.095271
BNCI2014_001,BNCI2014_001,Scheme 4,0.532485,0.096558
BNCI2014_001,BNCI2014_004,Scheme 1,0.682014,0.130091
BNCI2014_001,BNCI2014_004,Scheme 2,0.704593,0.131718
BNCI2014_001,BNCI2014_004,Scheme 3,0.701151,0.133244
BNCI2014_001,BNCI2014_004,Scheme 4,0.722778,0.150511
BNCI2014_001,Schirrmeister2017,Scheme 1,0.579375,0.094117
BNCI2014_001,Schirrmeister2017,Scheme 2,0.667411,0.089956
