# Notebook for Result Visualization

This notebook is used to generate all the figures that summarize \
the results of the data
partition analysis. It is divided in three main subsections:

1. Intra-subject vs Inter-subject variability
2. LNSO vs N-LNSO
3. LOSO vs Full N-LOSO


## Import packages

In [None]:
import glob
import os
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
import matplotlib.gridspec as gridspec
from matplotlib.ticker import MultipleLocator
from scipy import stats
import seaborn as sns
import math
from AllFnc.utilities import (
    GetLearningRateString,
    get_full_name,
    gather_metric_values,
    convert_performance_totable
)

import warnings 
warnings.filterwarnings("ignore", category = FutureWarning)

letters= ["A", "B", "C", "D"]

In [None]:
style = "light"

custom_params_dark = {
    'figure.facecolor': 'white',
    'axes.labelcolor': '.15',
    'xtick.direction': 'out',
    'ytick.direction': 'out',
    'xtick.color': '.15',
    'ytick.color': '.15',
    'axes.axisbelow': True,
    'grid.linestyle': '-',
    'text.color': '.15',
    'font.family': ['sans-serif'],
    'font.sans-serif': [
        'Arial',
        'DejaVu Sans',
        'Liberation Sans',
        'Bitstream Vera Sans',
        'sans-serif'
    ],
    'lines.solid_capstyle': 'round',
    'patch.edgecolor': 'w',
    'patch.force_edgecolor': True,
    'image.cmap': 'rocket',
    'xtick.top': False,
    'ytick.right': False,
    'axes.grid': True,
    'axes.facecolor': '#EAEAF2',
    'axes.edgecolor': '#0072b2',
    'grid.color': 'white',
    'axes.spines.left': True,
    'axes.spines.bottom': True,
    'axes.spines.right': True,
    'axes.spines.top': True,
    'xtick.bottom': False,
    'ytick.left': False
}
custom_params_light = {
    "axes.spines.right": False,
    "axes.spines.top": False,
    'axes.grid': True,
    'grid.linestyle': '-',
    'grid.color': 'lightgray',
}
if style == "dark":
    folder = "DarkTheme"
    sns.set_style("darkgrid", rc = custom_params_dark)
elif style == "light":
    folder = "LightTheme"
    sns.set_theme(style="ticks", rc=custom_params_light)
sns.set_context("paper", font_scale=1.5)  

## Intra-subject (KFOLD) vs Inter-subject (LNSO)

In [None]:
metric = 'accuracy_weighted'
task = ['pds', 'alz', 'bci']
model = ['shn', 'egn', 'dcn', 'res']
partition = ['KFOLD', 'LNSO']
performances    = gather_metric_values(metric, task, model, partition)
performances_df = convert_performance_totable(performances)

In [None]:
font = 25
letters= ["A", "B", "C", "D"]
fig, ax = plt.subplots(2, 2, figsize=(19.72, 15.5))
for n, i in enumerate(model):
    row, col = n//2, n%2
    model_df = performances_df.loc[performances_df["Model"]==i]
    model_df.loc[:,"Task"] = model_df.loc[:,"Task"].apply(get_full_name)
    sns.stripplot(
        x         = 'Task',
        y         = 'Metric',
        data      = model_df,
        legend    = False,
        linewidth = 1,
        hue       = 'Partition',
        dodge     = True,
        ax        = ax[row, col],
        size      = 12,
        palette   = ["#56b4e9", "#e69f00"],
        alpha    = 0.9
    )
    ax[row, col].set_yticks([i*10 for i in range(2, 11)])
    ax[row, col].set_title( f'Model: {get_full_name(i)}',fontsize = font+3, pad=16)
    if row==1:
        ax[row, col].set_xlabel('Task', fontsize = font, labelpad=12)
    else:
        ax[row,col].xaxis.label.set_visible(False)
    if col==0:
        ax[row, col].set_ylabel('Balanced Accuracy %', fontsize = font)
    else:
        ax[row,col].yaxis.label.set_visible(False)
    ax[row, col].set_ylim(5*5,103)
    ax[row, col].tick_params(axis='both', which='major', labelsize=font-5)
    ax[row, col].text(2.2,93,f'$({letters[n]})$',fontsize = font+6)
    for axis in ['top','bottom','left','right']:
        ax[row, col].spines[axis].set_linewidth(1.5)
    ax[row, col].legend(["Sample-based K-Fold", "Leave-N-Subjects-Out"], fontsize = font - 7.5, loc = "lower left")

fig.suptitle( f'Sample-based K-Fold vs. Leave-N-Subjects-Out',fontsize = font+8)
plt.subplots_adjust(top=0.9, hspace=0.25, wspace=0.25)
plt.savefig(f"Images/{folder}/Kfold_vs_LNSO_concat_vertical.pdf", bbox_inches='tight')
plt.show()

In [None]:
font = 22
for n, i in enumerate(model):
    model_df = performances_df.loc[performances_df["Model"]==i]
    model_df.loc[:,"Task"] = model_df.loc[:,"Task"].apply(get_full_name)
    #fig, ax = plt.subplots(figsize=(12, 11))
    fig, ax = plt.subplots(figsize=(8, 7.2))
    #sns.boxplot(x='Task', y='Metric', hue='Partition', data=model_df, showfliers=False, ax=ax)
    sns.stripplot(
        x         = 'Task',
        y         = 'Metric',
        data      = model_df,
        legend    = False,
        linewidth = 1,
        hue       = 'Partition',
        dodge     = True,
        ax        = ax,
        size      = 9,
        palette   = ["#56b4e9", "#e69f00"],
        alpha    = 0.9
    )
    #ax.set_yticks([i*5 for i in range(5, 21)])
    ax.set_yticks([i*10 for i in range(2, 11)])
    #ax.set_title( f'Sample-based K-Fold vs LNSO\nModel: {get_full_name(i)}',fontsize = font+3)
    ax.set_title( f'Model: {get_full_name(i)}',fontsize = font+3, pad=12)
    ax.set_xlabel('Task', fontsize = font, labelpad=12)
    ax.set_ylabel('Balanced Accuracy %', fontsize = font)
    ax.set_ylim(5*5,103)
    ax.tick_params(axis='both', which='major', labelsize=font-5)
    ax.text(2.2,93,f'$({letters[n]})$',fontsize = font+6)
    for axis in ['top','bottom','left','right']:
        ax.spines[axis].set_linewidth(1.5)
    plt.legend(["Sample-based K-Fold", "Leave-N-Subjects-Out"], fontsize = font - 6.1, loc = "lower left")
    plt.savefig(f"Images/{folder}/Kfold_vs_LNSO_model_{get_full_name(i)}.pdf", bbox_inches='tight')
    plt.show()

## Quantitative analysis using additional nested level

Comparing the results of subject-based and sample-based cross-validation methods
is not possible. Groups have folds that are not completely independent
(there is a 10% overlap), nor they can be considered paired. Both the Wilcoxon signed-rank 
or the Mann Whiteny U cannot be used in this context. Additionally, any statistical test
will tell if a mode

In [None]:
metric = 'accuracy_weighted'
task = ['pds', 'alz', 'bci']
model = ['shn', 'egn', 'dcn', 'res']
partition = ['NKFOLD','NLNSO']

performances, performances_val = gather_metric_values(metric, task, model, partition, True)
performances_df = convert_performance_totable(performances, performances_val)

In [None]:
grouped = performances_df.groupby(["Partition", "Task", "Model", "Outer"])
#df2 = grouped.apply(lambda x: x.Metric[x.MetricVal.idxmax()], include_groups=False)
df2 = grouped.apply(lambda x: x.Metric.mean(), include_groups=False)
df2 = df2.reset_index().rename(columns={0: "Metric"})
df3 = grouped.apply(lambda x: x.MetricVal.mean(), include_groups=False)
df3 = df3.reset_index().rename(columns={0: "Metric"})
df2["MetricDiff"] = df3["Metric"]- df2["Metric"]

In [None]:
font = 25
letters= ["A", "B", "C", "D"]
fig, ax = plt.subplots(2, 2, figsize=(19.72, 15.5))
for n, i in enumerate(model):
    row, col = n//2, n%2
    model_df = df2.loc[df2["Model"]==i]
    model_df.loc[:,"Task"] = model_df.loc[:,"Task"].apply(get_full_name)
    sns.stripplot(
        x         = 'Task',
        y         = 'MetricDiff',
        data      = model_df,
        legend    = False,
        linewidth = 1,
        hue       = 'Partition',
        dodge     = True,
        ax        = ax[row, col],
        size      = 12,
        palette   = ["#56b4e9", "#e69f00"],
        alpha    = 0.9
    )
    ax[row, col].set_yticks([i*10 for i in range(-6, 11)])
    ax[row, col].set_title( f'Model: {get_full_name(i)}',fontsize = font+3, pad=16)
    if row==1:
        ax[row, col].set_xlabel('Task', fontsize = font, labelpad=12)
    else:
        ax[row,col].xaxis.label.set_visible(False)
    if col==0:
        ax[row, col].set_ylabel('Bias Accuracy Balanced', fontsize = font)
    else:
        ax[row,col].yaxis.label.set_visible(False)
    ax[row, col].set_ylim(-30,80)
    ax[row, col].tick_params(axis='both', which='major', labelsize=font-5)
    ax[row, col].text(2.2,72,f'$({letters[n]})$',fontsize = font+6)
    for axis in ['top','bottom','left','right']:
        ax[row, col].spines[axis].set_linewidth(1.5)
    ax[row, col].legend(["Sample-based K-Fold", "Leave-N-Subjects-Out"], fontsize = font - 7.5, loc = "lower right")

fig.suptitle( f'Accuracy Estimation Bias - Sample-based K-Fold vs LNSO',fontsize = font+8)
plt.subplots_adjust(top=0.9, hspace=0.25, wspace=0.25)
plt.savefig(f"Images/{folder}/Bias_Kfold_vs_LNSO_concat.pdf", bbox_inches='tight')
plt.show()

## LNSO vs N-LNSO

In [None]:
metric = 'accuracy_weighted'
task = ['pds', 'alz', 'bci']
model = ['shn', 'egn', 'dcn', 'res']
partition = ['LNSO', 'NLNSO']

performances, performancesval = gather_metric_values(metric, task, model, partition, True)
performances_df = convert_performance_totable(performances)
performancesval_df = convert_performance_totable(performancesval)

In [None]:
performances_df["Task "] = performances_df["Task"].apply(
    lambda x: {'pds': 'Parkinson', 'alz':'Alzheimer', 'bci':'BCI'}.get(x))

In [None]:
for n, i in enumerate(model):
    nlnso_pd_tot = []
    lnso_tot = []
    for k in task:
        lnso = performances_df.loc[(
            (performances_df['Task']==k) &
            (performances_df['Model']==i) & 
            (performances_df['Partition']=="LNSO")
        ), "Metric"].values
        lnso_tot += lnso.tolist()
        nlnso_pd = performances_df.loc[(
            (performances_df['Task']==k) &
            (performances_df['Model']==i) & 
            (performances_df['Partition']=="NLNSO")
        )]
        nlnso_pd.loc[:,'Outer'] = lnso[nlnso_pd['Outer'].values-1]
        if not(isinstance(nlnso_pd_tot, pd.DataFrame)):
            nlnso_pd_tot = nlnso_pd
        else:
            try:
                nlnso_pd_tot = pd.concat([nlnso_pd_tot, nlnso_pd])
            except:
                nlnso_pd_tot = nlnso_pd
    lnso_tot = np.array(lnso_tot)
    nlnso = nlnso_pd_tot["Metric"].values
    nlnso = nlnso.reshape(-1,10)
    nlnso_med = np.median(nlnso,1)
    nlnso_min = np.min(nlnso,1)
    nlnso_max = np.max(nlnso,1)
    nlnso_25 = np.percentile(nlnso,25, 1)
    nlnso_75 = np.percentile(nlnso,75, 1)
    nlnso_err = np.stack((nlnso_med-nlnso_min, nlnso_max-nlnso_med),1).T
    nlnso_err = np.stack((nlnso_med-nlnso_25, nlnso_75-nlnso_med),1).T
    nlnso_iqr = np.subtract(*np.percentile(nlnso, [75, 25], 1))
    font = 22

    range_x = np.arange(10)
    fig, ax = plt.subplots(figsize=(8, 7.2))
    sns.scatterplot(
        data=nlnso_pd_tot,
        x="Outer",
        y="Metric",
        hue="Task ",
        ax=ax,
        legend=True,
        alpha = 0.8,
        palette=["#e69f00", "#009e73", "#f0e442"],
        edgecolors='black',
        zorder=2
    )
    lgnd = ax.legend(fontsize = font-6, loc = "lower right", markerscale=2)    
    sns.regplot(
        data=nlnso_pd_tot, x="Outer", y="Metric",
        ax=ax, color='#1f77b4', ci=95,
        scatter=False, fit_reg=True,
        scatter_kws={'alpha':0.4, 'hue':"Task", 'size':15, 'edgecolors':'black'},
        line_kws = {'color': "#0072b2", 'linewidth': 2}
    )
    ax.collections[0].set_sizes([50])
    ax.plot(np.arange(101), np.arange(101), color= 'black', zorder=1 )
    ax.set_yticks([i*10 for i in range(0, 11)])
    ax.set_xticks([i*10 for i in range(0, 11)])
    ax.set_title(f'Model: {get_full_name(i)}', fontsize = font+3, pad=16)
    ax.set_xlabel('LNSO Balanced Accuracy %', fontsize = font)
    ax.set_ylabel('N-LNSO Balanced Accuracy %', fontsize = font)
    ax.set_ylim(18,102)
    ax.set_xlim(18,102)
    ax.tick_params(axis='both', which='major', labelsize=font-5)
    ax.text(21,93,f'$({letters[n]})$',fontsize = font+6)
    for axis in ['top','bottom','left','right']:
        ax.spines[axis].set_linewidth(1.5)
    file_name = f"Images/{folder}/LNSO_vs_NLNSO_model_{get_full_name(i)}_all_tasks.pdf"
    plt.savefig(file_name, bbox_inches='tight')
    plt.show()

In [None]:
font = 25
fig, ax = plt.subplots(2, 2, figsize=(19.72, 17.7))
for n, i in enumerate(model):
    row, col = n//2, n%2
    nlnso_pd_tot = []
    lnso_tot = []
    for k in task:
        lnso = performances_df.loc[(
            (performances_df['Task']==k) &
            (performances_df['Model']==i) & 
            (performances_df['Partition']=="LNSO")
        ), "Metric"].values
        lnso_tot += lnso.tolist()
        nlnso_pd = performances_df.loc[(
            (performances_df['Task']==k) &
            (performances_df['Model']==i) & 
            (performances_df['Partition']=="NLNSO")
        )]
        nlnso_pd.loc[:,'Outer'] = lnso[nlnso_pd['Outer'].values-1]
        if not(isinstance(nlnso_pd_tot, pd.DataFrame)):
            nlnso_pd_tot = nlnso_pd
        else:
            try:
                nlnso_pd_tot = pd.concat([nlnso_pd_tot, nlnso_pd])
            except:
                nlnso_pd_tot = nlnso_pd
    lnso_tot = np.array(lnso_tot)
    nlnso = nlnso_pd_tot["Metric"].values
    nlnso = nlnso.reshape(-1,10)
    range_x = np.arange(10)

    sns.scatterplot(
        data=nlnso_pd_tot,
        x="Outer",
        y="Metric",
        hue="Task ",
        ax=ax[row, col],
        legend=True,
        alpha = 0.8,
        palette=["#e69f00", "#009e73", "#f0e442"],
        edgecolors='black',
        zorder=2
    )
    lgnd = ax[row, col].legend(fontsize = font-7.5, loc = "lower right", markerscale=2)

    sns.regplot(
        data=nlnso_pd_tot, x="Outer", y="Metric",
        ax=ax[row, col], color='#1f77b4', ci=95,
        scatter=False, fit_reg=True,
        scatter_kws={'alpha':0.4, 'hue':"Task", 'size':15, 'edgecolors':'black'},
        line_kws = {'color': "#0072b2", 'linewidth': 2}
    )
    
    ax[row, col].collections[0].set_sizes([90])
    ax[row, col].plot(np.arange(101), np.arange(101), zorder = 1, color= "black")

    if row==1:
        ax[row, col].set_xlabel('LNSO Balanced Accuracy %', fontsize = font, labelpad=12)
    else:
        ax[row,col].xaxis.label.set_visible(False)

    if col==0:
        ax[row, col].set_ylabel('N-LNSO Balanced Accuracy %', fontsize = font)
    else:
        ax[row,col].yaxis.label.set_visible(False)

    ax[row, col].set_yticks([i*10 for i in range(0, 11)])
    ax[row, col].set_xticks([i*10 for i in range(0, 11)])
    ax[row, col].set_title(f'Model: {get_full_name(i)}', fontsize = font+3)
    ax[row, col].set_ylim(18,102)
    ax[row, col].set_xlim(18,102)
    ax[row, col].tick_params(axis='both', which='major', labelsize=font-5)
    ax[row, col].text(21,93,f'$({letters[n]})$',fontsize = font+6)
    for axis in ['top','bottom','left','right']:
        ax[row, col].spines[axis].set_linewidth(1.5)
fig.suptitle(f'Leave-N-Subjects-Out (LNSO) vs. Nested-Leave-N-Subjects-Out (N-LNSO)', fontsize= font+7)
plt.subplots_adjust(top=0.9, hspace=0.25, wspace=0.25)
plt.savefig(f"Images/{folder}/LNSO_vs_NLNSO_all_tasks_vertical.pdf", bbox_inches='tight')
plt.show()

## Quantitative analysis using additional nested level

Comparing the results of subject-based and nested-subject-based cross-validation methods
is not possible. Groups have folds that are not completely independent, nor they can be considered paired.
Both the Wilcoxon signed-rank  or the Mann Whiteny U cannot be used in this context.

In [None]:
metric = 'accuracy_weighted'
task = ['pds', 'alz', 'bci']
model = ['shn', 'egn', 'dcn', 'res']
partition = ['NLNSO', 'NNLNSO']

performances, performancesval, performancestest = gather_metric_values(
    metric, task, model, partition, True, True
)
performances_df = convert_performance_totable(performances, performancesval, performancestest)

In [None]:
df1 = (performances_df.loc[performances_df['Partition']=='NLNSO']).copy()
df2 = (performances_df.loc[performances_df['Partition']=='NNLNSO']).copy()
df2.loc[:,"MetricValTest"] = df2.loc[:,"MetricTest"] + df2.loc[:,"MetricVal"]
grouped_nlnso = df1.groupby(["Partition", "Task", "Model", "Outer"])
grouped_nnlnso = df2.groupby(["Partition", "Task", "Model", "Repetition"])

df1 = grouped_nlnso.apply(lambda x: x.Metric.mean(), include_groups=False)
df1 = df1.reset_index().rename(columns={0: "Metric"})

df3 = grouped_nlnso.apply(lambda x: x.MetricVal.mean(), include_groups=False)
df3 = df3.reset_index().rename(columns={0: "MetricEstimate"})
df1.loc[:,"MetricEstimate"] = df3.loc[:,"MetricEstimate"]
df1["MetricDiff"] = df1["MetricEstimate"]- df1["Metric"]

df4 = grouped_nlnso.apply(lambda x: x.Inner[x.MetricVal.idxmax()], include_groups=False)
df4 = df4.reset_index().rename(columns={0: "Inner"})
df1.loc[:,"Repetition"] = 1
df1.loc[:,"Inner"] = df4.loc[:,"Inner"]


df2 = grouped_nnlnso.apply(lambda x: x.Metric.mean(), include_groups=False)
df2 = df2.reset_index().rename(columns={0: "Metric"})

df3 = grouped_nnlnso.apply(lambda x: x.Outer[x.MetricValTest.idxmax()], include_groups=False)
df3 = df3.reset_index().rename(columns={0: "Outer"})
df2.loc[:,'Outer'] = df3.loc[:,"Outer"]

df4 = grouped_nnlnso.apply(lambda x: x.Inner[x.MetricValTest.idxmax()], include_groups=False)
df4 = df4.reset_index().rename(columns={0: "Inner"})
df2.loc[:,"Inner"] = df4.loc[:,"Inner"]

df4 = grouped_nnlnso.apply(lambda x: x.MetricTest.mean(), include_groups=False)
df4 = df4.reset_index().rename(columns={0: "MetricEstimate"})
df2.loc[:,"MetricEstimate"] = df4.loc[:,"MetricEstimate"]
df2["MetricDiff"] = df2["MetricEstimate"]- df2["Metric"]

df2 = pd.concat([df1, df2])
del df3, df4

In [None]:
for i in model:
    for k in task:
        diff_nlnso = df2.loc[(df2["Model"]==i) & (df2["Task"]==k) & (df2["Partition"]=="NLNSO"), "MetricDiff"].values
        diff_nnlnso = df2.loc[(df2["Model"]==i) & (df2["Task"]==k) & (df2["Partition"]=="NNLNSO"), "MetricDiff"].values
        print(
            f'{get_full_name(i):15} - {get_full_name(k):10}:',
            f'{np.abs(diff_nlnso).mean():10.4f} / ',
            f'{np.abs(diff_nnlnso).mean():10.4f} / ',
            f'{np.median(np.abs(diff_nlnso)):10.4f} / ',
            f'{np.median(np.abs(diff_nnlnso)):10.4f} ',
        )

print('\n\n')
for i in model:
    for k in task:
        diff_nlnso = df2.loc[(df2["Model"]==i) & (df2["Task"]==k) & (df2["Partition"]=="NLNSO"), "MetricDiff"].values
        diff_nnlnso = df2.loc[(df2["Model"]==i) & (df2["Task"]==k) & (df2["Partition"]=="NNLNSO"), "MetricDiff"].values
        print(
            f'{get_full_name(i):15} - {get_full_name(k):10}:',
            f'{np.abs(diff_nlnso).std():10.4f} / ',
            f'{np.abs(diff_nnlnso).std():10.4f} / ',
            f'{np.subtract(*np.percentile(np.abs((diff_nlnso)),[75, 25])):10.4f} / ',
            f'{np.subtract(*np.percentile(np.abs((diff_nnlnso)),[75, 25])):10.4f} ',
        )

## LOSO vs Full N-LOSO

In [None]:
Nsubj_dict = {'alz': 88, 'pds': 81,'bci': 106}

metric = 'accuracy_weighted'
task = ['pds', 'alz', 'bci']
model = ['shn', 'egn', 'dcn', 'res']
partition = ['LOSO', 'NLOSO']

performances, performancesval = gather_metric_values(metric, task, model, partition, True)
performances_df = convert_performance_totable(performances)
performancesval_df = convert_performance_totable(performancesval)

In [None]:
Nsubj_dict = {'alz': 88,'pds': 81,'bci': 106}
for k in task:
    Nsubj = Nsubj_dict[k]
    for i in ['egn']:
        loso = performances_df.loc[(
            (performances_df['Task']==k) &
            (performances_df['Model']==i) & 
            (performances_df['Partition']=="LOSO")
        ), "Metric"].values
        nloso_pd = performances_df.loc[(
            (performances_df['Task']==k) &
            (performances_df['Model']==i) & 
            (performances_df['Partition']=="NLOSO")
        )]
        nloso = nloso_pd["Metric"].values
        nloso = nloso.reshape(Nsubj,Nsubj-1)
        nloso_med = np.median(nloso,1)
        nloso_min = np.min(nloso,1)
        nloso_max = np.max(nloso,1)
        nloso_err = np.stack((nloso_med-nloso_min, nloso_max-nloso_med),1).T

        amd = loso
        bmd = nloso_med
        #print(i, k, (((bmd-amd)>0).sum()) )

        amd = np.median(nloso_max)
        bmd = np.percentile(nloso_max, 75) - np.percentile(nloso_max, 25)
        #print(i, k, f'{amd:.3f}, {bmd:.3f}' )
        
        amd = np.median(loso)
        bmd = np.median(nloso.reshape(-1))
        cmd = 100*(amd-bmd)/amd
        #print(i, k, f'{amd:.3f}, {bmd:.3f}, {cmd:.3f}' )
        
        amd = np.percentile(loso, 25)
        bmd = np.percentile(nloso.reshape(-1), 25)
        cmd = 100*(amd-bmd)/amd
        #print(i, k, f'{amd:.3f}, {bmd:.3f}, {cmd:.3f}' )

        amd = np.percentile(loso, 75)
        bmd = np.percentile(nloso.reshape(-1), 75)
        cmd = 100*(amd-bmd)/amd
        #print(i, k, f'{amd:.3f}, {bmd:.3f}, {cmd:.3f}' )

        amd = np.percentile(loso, 75) - np.percentile(loso, 25)
        bmd = np.percentile(nloso.reshape(-1), 75) - np.percentile(nloso.reshape(-1), 25)
        cmd = 100*(amd-bmd)/amd
        print(i, k, f'{amd:.3f}, {bmd:.3f}, {cmd:.3f}' )

        amd = np.mean(loso)
        bmd = np.mean(nloso.reshape(-1))
        cmd = 100*(amd-bmd)/amd
        #print(i, k, f'{amd:.3f}, {bmd:.3f}, {cmd:.3f}' )

        amd = np.std(loso)
        bmd = np.std(nloso.reshape(-1))
        cmd = 100*(amd-bmd)/amd
        #print(i, k, f'{amd:.3f}, {bmd:.3f}, {cmd:.3f}' )

    #print(' ')

In [None]:
Nsubj_dict = {'alz': 88,'pds': 81,'bci': 106}
boxcolor     = "#56b4e9"#'#d55e00'
fliercolor   = 'gray'
linecolor    = '#137'
scattercolor = "#c97b63"#009292' 

scattercolor = "#0072b2"#'#009292' 
boxcolor     = '#e69f00'

font = 22
for k in task:
    Nsubj = Nsubj_dict[k]
    for n, i in enumerate(model):
        loso = performances_df.loc[(
            (performances_df['Task']==k) &
            (performances_df['Model']==i) & 
            (performances_df['Partition']=="LOSO")
        ), "Metric"].values
        nloso_pd = performances_df.loc[(
            (performances_df['Task']==k) &
            (performances_df['Model']==i) & 
            (performances_df['Partition']=="NLOSO")
        )]
        nloso = nloso_pd["Metric"].values
        nloso = nloso.reshape(Nsubj,Nsubj-1)
        range_x = np.arange(Nsubj)
        fig, ax = plt.subplots(figsize=(6.574*3, 2.41*3)) #2.95
        sns.scatterplot(
            x=range_x, y=loso, ax=ax, hue=np.zeros(Nsubj), legend=False,
            edgecolor = 'black', zorder=10, palette=[scattercolor])
        ax.collections[0].set_sizes([50])
        sns.boxplot(
            data=nloso_pd,
            x="Outer",
            y="Metric",
            ax=ax,
            fill=True,
            showfliers=False,
            linecolor = linecolor,
            flierprops = dict(
                marker='o',
                markerfacecolor=fliercolor,
                linestyle='none',
                markeredgecolor=fliercolor
            ),
            boxprops=dict(alpha=.8),
            palette=[boxcolor]*Nsubj,
            width = 0.625,
            #saturation = .9
        ) #56b4e9
        ax.set_yticks([i*10 for i in range(0, 11)])
        #ax.set_xticks([5-1] + [i*5-1 for i in range(2, (Nsubj)//5 +1)])
        
        ax.xaxis.set_major_locator(MultipleLocator(5, offset=-1))
        ax.xaxis.set_minor_locator(MultipleLocator(1))
        
        ax.tick_params(axis='both', which='major', labelsize=font-3)
        ax.set_title(
            f'Model: {get_full_name(i)} - Task: {get_full_name(k)}',
            fontsize = font+3,
            pad = 16
        )
        ax.set_xlabel('Subject ID', fontsize = font)
        ax.set_ylabel('Balanced Accuracy %', fontsize = font)
        if k == 'bci':
            ax.set_ylim(16, 94)
        else:
            ax.set_ylim(-3,102)
        ax.xaxis.grid(True, linewidth=1.5)
        ax.yaxis.grid(True, linewidth=1.5)
        ax.grid(which='minor', alpha=0.3)
        ax.grid(which='major', alpha=0.8)
        for axis in ['top','bottom','left','right']:
            ax.spines[axis].set_linewidth(1.5)

        if k == 'bci':
            ax.text(0, 98.5, f'$({letters[n]})$',fontsize = font+3)
        else:
            ax.text(0, 106.5, f'$({letters[n]})$',fontsize = font+3)
        file_name = f"Images/{folder}/LOSO_vs_FNLOSO_model_{get_full_name(i)}_tasks_{get_full_name(k)}.pdf"
        plt.savefig(file_name, bbox_inches='tight')
        plt.show()

In [None]:
Nsubj_dict = {'alz': 88,'pds': 81,'bci': 106}
boxcolor     = "#56b4e9"#'#d55e00'
fliercolor   = 'gray'
linecolor    = '#137'
scattercolor = "#c97b63"#009292' 

scattercolor = "#0072b2"#'#009292' 
boxcolor     = '#e69f00'

font = 21
for k in task:
    Nsubj = Nsubj_dict[k]
    fig, ax = plt.subplots(4, 1, figsize=(6.574*3, 7.6457*3)) #2.95
    for n, i in enumerate(model):
        row, col = n, 1
        loso = performances_df.loc[(
            (performances_df['Task']==k) &
            (performances_df['Model']==i) & 
            (performances_df['Partition']=="LOSO")
        ), "Metric"].values
        nloso_pd = performances_df.loc[(
            (performances_df['Task']==k) &
            (performances_df['Model']==i) & 
            (performances_df['Partition']=="NLOSO")
        )]
        nloso = nloso_pd["Metric"].values
        nloso = nloso.reshape(Nsubj,Nsubj-1)
        range_x = np.arange(Nsubj)
        sns.scatterplot(
            x=range_x, y=loso, ax=ax[row], hue=np.zeros(Nsubj), legend=False,
            edgecolor = 'black', zorder=10, palette=[scattercolor])
        ax[row].collections[0].set_sizes([50])
        sns.boxplot(
            data=nloso_pd,
            x="Outer",
            y="Metric",
            ax=ax[row],
            fill=True,
            showfliers=False,
            linecolor = linecolor,
            flierprops = dict(
                marker='o',
                markerfacecolor=fliercolor,
                linestyle='none',
                markeredgecolor=fliercolor
            ),
            boxprops=dict(alpha=.8),
            palette=[boxcolor]*Nsubj,
            width = 0.625,
            #saturation = .9
        ) #56b4e9
        ax[row].set_yticks([i*10 for i in range(0, 11)])
        #ax.set_xticks([5-1] + [i*5-1 for i in range(2, (Nsubj)//5 +1)])
        ax[row].xaxis.set_major_locator(MultipleLocator(5, offset=-1))
        ax[row].xaxis.set_minor_locator(MultipleLocator(1))
        ax[row].tick_params(axis='both', which='major', labelsize=font-3)

        # Specify the minor ticks where you want labels
        #minor_ticks = [1] + [None]*(Nsubj_dict[k]-2) +[88]
        #ax[row].set_xticks([i for i in range(Nsubj_dict[k])], minor=True)
        #ax[row].set_xticklabels(minor_ticks, minor=True)
        #ax[row].tick_params(axis='both', which='minor', labelsize=font-3, pad=4.5)
 
        ax[row].set_title(
            f'Model: {get_full_name(i)} - Task: {get_full_name(k)}',
            fontsize = font+3,
            pad = 16
        )
        ax[row].set_ylabel('Balanced Accuracy %', fontsize = font)
        if n==3:
            ax[row].set_xlabel('Subject ID', fontsize = font)
        else:
            ax[row].xaxis.label.set_visible(False)
        
        if k == 'bci':
            ax[row].set_ylim(16, 94)
        else:
            ax[row].set_ylim(-3,102)
        
        ax[row].xaxis.grid(True, linewidth=1.5)
        ax[row].yaxis.grid(True, linewidth=1.5)
        ax[row].grid(which='minor', alpha=0.3)
        ax[row].grid(which='major', alpha=0.8)
        for axis in ['top','bottom','left','right']:
            ax[row].spines[axis].set_linewidth(1.5)

        if k == 'bci':
            ax[row].text(0, 98.5, f'$({letters[n]})$',fontsize = font+3)
        else:
            ax[row].text(0, 106.5, f'$({letters[n]})$',fontsize = font+3)

    fig_title = f'Leave-One-Subject-Out (blue dots) vs. Nested-Leave-One-Subjects-Out (orange boxes)'
    fig.suptitle(fig_title, fontsize= font+5.5)
    plt.subplots_adjust(top=.935, hspace=0.28)
    file_name = f"Images/{folder}/LOSO_vs_FNLOSO_all_models_tasks_{get_full_name(k)}.pdf"
    plt.savefig(file_name, bbox_inches='tight')
    plt.show()

In [None]:
Nsubj_dict = {'pds': 81, 'alz': 88, 'bci': 106}
font=25
metric_diff = [np.zeros((4, Nsubj)) for Nsubj in Nsubj_dict.values()]
metric_var = [np.zeros((4, Nsubj)) for Nsubj in Nsubj_dict.values()]


for n, k in enumerate(task):
    Nsubj = Nsubj_dict[k]
    for m, i in enumerate(model):
        loso = performances_df.loc[(
            (performances_df['Task']==k) &
            (performances_df['Model']==i) & 
            (performances_df['Partition']=="LOSO")
        ), "Metric"].values
        nloso_pd = performances_df.loc[(
            (performances_df['Task']==k) &
            (performances_df['Model']==i) & 
            (performances_df['Partition']=="NLOSO")
        )]
        nloso = nloso_pd["Metric"].values
        nloso = nloso.reshape(Nsubj,Nsubj-1)
        nloso_med = np.median(nloso,1)
        nloso_var = np.subtract(*np.percentile(nloso,[75, 25], axis=1))
        metric_diff[n][m] = loso - nloso_med
        metric_var[n][m] = nloso_var
    
    nloso = pd.DataFrame(metric_diff[n].T, columns=[get_full_name(l) for l in model])
    nloso['Task'] = get_full_name(k)
    nloso_var = pd.DataFrame(metric_var[n].T, columns=[get_full_name(l) for l in model])
    nloso_var['Task'] = get_full_name(k)
    if n==0:
        nloso_tot = nloso
        nloso_var_tot = nloso_var
    else:
        nloso_tot = pd.concat([nloso_tot, nloso])
        nloso_var_tot = pd.concat([nloso_var_tot, nloso_var])

nloso_tot = pd.melt(
    nloso_tot,
    id_vars=['Task'],
    value_vars=['ShallowConvNet', 'EEGNet',  'DeepConvNet', 'T-ResNet'],
    var_name='Model',
    value_name='Metric'
)

nloso_var_tot = pd.melt(
    nloso_var_tot,
    id_vars=['Task'],
    value_vars=['ShallowConvNet', 'EEGNet', 'DeepConvNet', 'T-ResNet'],
    var_name='Model',
    value_name='Metric'
)

fig, ax = plt.subplots(1, 2, figsize=(19.72, 8.8))
sns.boxplot(
    data      = nloso_tot,
    x         = 'Task',
    y         = 'Metric',
    legend    = True,
    linewidth = 1.5,
    hue       = 'Model',
    ax        = ax[0],
    showfliers= False,
    boxprops=dict(alpha=.9),
    linecolor = '#137',
    palette   = ["#f0e442", "#0072b2", "#e69f00", "#009e73"]
)
ax[0].legend(fontsize = font-7, loc = "upper right")
ax[0].yaxis.set_major_locator(MultipleLocator(10))
ax[0].tick_params(axis='both', which='major', labelsize=font-5)
ax[0].set_xlabel('Task', fontsize = font-2)
#ax[0].set_ylabel(r'Subject-wise $\Delta_{\text{N-LOSO}}$', fontsize = font)
ax[0].set_ylabel(r'Subject-wise [$\text{LOSO}_{\text{Acc}}-\text{med(N-LOSO}_{\text{Acc}})$]', fontsize = font-2)
ax[0].set_title(f'Accuracy difference between\nLOSO and median N-LOSO.\nDistribution across subjects', fontsize = font)
for axis in ['top','bottom','left','right']:
    ax[0].spines[axis].set_linewidth(1.5)
ax[0].text(-0.8, 96.5, f'$(A)$',fontsize = font+3)

sns.boxplot(
    data      = nloso_var_tot,
    x         = 'Task',
    y         = 'Metric',
    legend    = True,
    linewidth = 1.5,
    hue       = 'Model',
    ax        = ax[1],
    showfliers= False,
    linecolor = '#137',
    palette   = ["#f0e442", "#0072b2", "#e69f00", "#009e73"]
)
ax[1].legend(fontsize = font-7, loc = "upper right")
ax[1].yaxis.set_major_locator(MultipleLocator(10))
ax[1].tick_params(axis='both', which='major', labelsize=font-5)
ax[1].set_xlabel('Task', fontsize = font-2)
ax[1].set_ylabel(r'Subject-wise $\text{IQR}_{\text{N-LOSO}}$', fontsize = font-2)
ax[1].set_title(f'N-LOSO interquartile range (IQR).\nDistribution across subjects', fontsize = font)
for axis in ['top','bottom','left','right']:
    ax[1].spines[axis].set_linewidth(1.5)
ax[1].text(-0.8, 106.5, f'$(B)$',fontsize = font+3)
plt.savefig(f"Images/{folder}/LOSO_vs_FNLOSO_model_delta_and_iqr.pdf", bbox_inches='tight')
plt.show()