# Statistical Analysis on different Pipelines

This notebook implements all the figures shown in the original paper.

To run it, you must create a **custom environment for statannotations**,
which unfortunately has messed up the dependecy list.
This issue was solved by creating a python environment with python 3.11,
then by installing seaborn 0.12, and finally statannotations with the --no-dependencies
option.

You also need to run the python script RunKfoldAll.py to train all the models.
Alternatively, we provide a list of pickle files (highly compressed with zpaq) with the
result got from of every single model.

In [None]:
# Import Libraries
import os
import pickle
import pandas as pd
import glob
import numpy as np
import matplotlib.pyplot as plt
import scipy
import seaborn as sns
from statannotations.Annotator import Annotator
import math
import warnings

# Set Style and Warnings
warnings.filterwarnings('ignore',category=FutureWarning)
sns.set_style("darkgrid")
sns.set_palette("colorblind")

# Set Path
sep = os.path.sep
current_folder1    = os.getcwd()
current_folder     = '/home/delpup/EEG_SSL_Project'

root_folder_name   = 'eegprepro'
images_folder_name = 'Images'

root_folder    = current_folder + sep + root_folder_name   + sep
images_folder  = current_folder1 + sep + images_folder_name + sep

In [None]:
def GetLearningRateString(model, task):
    model_conversion_dict = {
        'egn': 'eegnet',
        'shn': 'shallownet',
        'dcn': 'deepconvnet',
        'fbc': 'fbcnet'
    }
    task_conversion_dict = {
        'eye': 'eyes',
        'alz': 'alzheimer',
        'mmi': 'motorimagery',
        'pds': 'parkinson',
        'slp': 'sleep',
        'fep': 'psychosis'
    }
    if len(model)==3:
        model = model_conversion_dict.get(model)
    if len(task)==3:
        task = task_conversion_dict.get(task)
    lr_dict = {
        'eegnet': {
            'eyes': 5e-04,
            'parkinson': 1e-04,
            'alzheimer': 7.5e-04,
            'motorimagery': 1e-03,
            'sleep': 1e-03,
            'psychosis': 1e-04
        },
        'shallownet': {
            'eyes': 1e-03,
            'parkinson': 2.5e-04, #2.5e-05
            'alzheimer': 5e-05,
            'motorimagery': 7.5e-04,
            'sleep': 5e-05,
            'psychosis': 7.5e-05
        },
        'deepconvnet': {
            'eyes': 7.5e-04,
            'parkinson': 2.5e-04,
            'alzheimer': 7.5e-04,
            'motorimagery': 7.5e-04,
            'sleep': 2.5e-04,
            'psychosis': 1e-03
        },
        'fbcnet': {
            'eyes': 7.5e-04,
            'parkinson': 2.5e-04,
            'alzheimer': 7.5e-05,
            'motorimagery': 1e-3,
            'sleep': 1e-04,
            'psychosis': 1e-05
        }
    }
    lr = lr_dict.get(model).get(task)
    lr = str(int(lr*1e6)).zfill(6)
    return lr

In [None]:
# Hyper-parameters
task_folder = ['Eoec','MI','PD','Alz','Sleep','FEP']
task_names  = ['eye','mmi','pds','alz','slp','fep']
models      = ['egn','shn','dcn','fbc']
pipelines   = ['raw','flt','ica','isr']
srate       = [125,250]
verbose     = False

out_folder = 10
in_folder = 5
pth = 0.05

# Set Conversion Dictionary for Plots
convert_dict = {'eye': 'Eye',
                'mmi': 'MMI',
                'pds': 'Parkinson',
                'alz': 'Alzheimer',
                'slp': 'Sleep',
                'fep': 'FEP',
                'egn': 'EEGNet',
                'shn': 'ShallowNet',
                'dcn': 'DeepConvNet',
                'fbc': 'FBCNet',
                'raw': 'Raw',
                'flt': 'Filt',
                'ica': 'ICA',
                'isr': 'ICA+ASR',
                'accuracy_unbalanced': 'Unbalanced Accuracy',
                'accuracy_weighted'  : 'Balanced Accuracy',
                'f1score_weighted'   : 'F1-score Weighted',
                'precision_weighted' : 'Precision Weighted',
                'recall_weighted'    : 'Recall Wighted'                
               }

In [None]:
def median_acc(root_folder, numbers, task_folder, task_names, pipelines, models, srate, metric, out_folder, in_folder, pth=0.05, verbose=False):
    
    root_path = root_folder + task_folder[numbers[0]] + 'Classification/Results/'
    
    acc = np.zeros((out_folder*in_folder,len(pipelines)))
    for j in range(len(pipelines)):
        string_f1 = task_names[numbers[0]] + '_'
        string_f2 = '_' + str(srate) +'_' + models[numbers[1]]

        lr = GetLearningRateString(models[numbers[1]], task_names[numbers[0]])
        
        path = root_path + string_f1 + pipelines[j] + string_f2 + '_' + '*' + lr + '*_061_*.pickle'
        f = glob.glob(path)
        f = sorted(
            f,
            key = lambda x: (int(x.split(os.sep)[-1].split('_')[4]),
                             int(x.split(os.sep)[-1].split('_')[5])
                            )
        )
        if verbose:
            print(path)
            print(len(f))

        if len(f)==0:
            if verbose:
                print('Empty files @:' + path)
        else:
            for k in range(len(f)):
                with open(f[k], 'rb') as file:
                    a = pickle.load(file)
                    
                acc[k,j] = a[metric]
    
    if verbose:
        print('----Saphiro-Wilk tests for model: ' + models[numbers[1]] + ', task: ' + task_names[numbers[0]] + ' -----')
        for j in range(len(pipelines)):
            if scipy.stats.shapiro(acc[:,j])[1]<pth:
                print('For pipeline: ' + pipelines[j] + ' | the distribution of ' + metric + ' is normal')
        print('-------------------------------------------')
        print('-------------------------------------------')
    med_acc = np.median(acc,0)
    
    return acc, med_acc

In [None]:
def get_TensorMetrics(root_folder, margin_type, convert_dict, task_folder, task_names, models, pipelines, 
                      srate, metric, out_folder, in_folder, pth, verbose):
                          
    margins = ['Task','Model']
    
    if margin_type == margins[0]:
        if verbose:
            print('Margin over: Task')
        n = len(task_names)
        m = len(models)
        pairs = []
        for j in range(len(models)):
            for i in range(len(pipelines)):
                for k in range(i+1, len(pipelines)):
                    pairs.append([(models[j],convert_dict[pipelines[i]]),(models[j],convert_dict[pipelines[k]])])
                        
    elif margin_type == margins[1]:
        if verbose:
            print('Margin over: Model')
        n = len(models)
        m = len(task_names)
        
        pairs = []
        for j in range(len(task_names)):
            for i in range(len(pipelines)):
                for k in range(i+1, len(pipelines)):
                    pairs.append([(task_names[j],convert_dict[pipelines[i]]),(task_names[j],convert_dict[pipelines[k]])])
    
    med_acc = np.zeros((n,m,len(pipelines)))
    acc_ten = np.zeros((n,m,out_folder*in_folder,len(pipelines)))
    if margin_type == margins[0]:
        shape_ten = np.shape(acc_ten)
        if verbose:
            print('Tasks: ', shape_ten[0], '| Models: ',shape_ten[1],'| Folds: ', shape_ten[2], '| Pipelines: ',shape_ten[3])
    elif margin_type == margins[1]:
        shape_ten = np.shape(acc_ten)
        if verbose:
            print('Models: ', shape_ten[0], '| Tasks: ',shape_ten[1],'| Folds: ', shape_ten[2], '| Pipelines: ',shape_ten[3])
        
    for u in range(n):
        for i in range(m):
            if margin_type == margins[0]:
                numbers = [u,i]
            elif margin_type == margins[1]:
                numbers = [i,u]
                
            [acc_ten[u,i,:,:] , med_acc[u,i,:]] = median_acc(root_folder, numbers, task_folder, task_names, pipelines, models,
                                                             srate, metric, out_folder, in_folder, pth, verbose)
    return acc_ten, med_acc, pairs, n

In [None]:
def friedman_test(med_acc, task_or_model_name, pipelines, metric, verbose=False):
    
    if np.shape(med_acc)[1] == 4:
        es = scipy.stats.friedmanchisquare(med_acc[:,0],med_acc[:,1],med_acc[:,2],med_acc[:,3])
    elif np.shape(med_acc)[1] == 3:
        es = scipy.stats.friedmanchisquare(med_acc[:,0],med_acc[:,1],med_acc[:,2])
        
    rank_matrix = ((len(pipelines)+1)-scipy.stats.rankdata(med_acc,axis=1)).astype(int)
    
    N = np.shape(med_acc)[0]
    k = len(pipelines)
    avg_rank = np.mean(rank_matrix,0)
    chi2 = 12*N/(k*(k+1))*np.sum(avg_rank**2) - 3*N*(k+1)
    
    F = (N-1)*chi2/(N*(k-1)-chi2+np.finfo(np.float32).eps)
    
    q_05_v = np.array([1.960, 2.343, 2.569, 2.728, 2.850, 2.949, 3.031, 3.102, 3.16])
    
    if k<=10:
        q_05 = q_05_v[k-2]
        
    CD = q_05*np.sqrt(k*(k+1)/(6*N))

    if verbose:
        print(es)
        print('Rank matrix :')
        print(np.round(rank_matrix,3))
        print('Avg Rank: ', np.round(avg_rank,3))
        print('Chi2: ',np.round(chi2,3))
        print('CD: ',np.round(CD,3))
        
    diff_v = []
    for i in range(k):
        for j in range(i+1,k):
            diff = np.abs(avg_rank[i] - avg_rank[j])
            diff_v.append(diff)
            if verbose:
                if diff >= CD:
                    print(pipelines[i] + ' vs ' + pipelines[j],': different, |Ri-Rj|= ',diff)
                else:
                    print(pipelines[i] + ' vs ' + pipelines[j],': no different, |Ri-Rj|= ',diff)

    friedmann_dict = {'name': task_or_model_name,
                      'metric': metric,
                      'F':F,
                      'chi2':chi2,
                      'avg_rank':avg_rank,
                      'rank_matrix':rank_matrix,
                      'CD':CD,
                      'diff':diff_v,
                      'es':es}

    return friedmann_dict

## Statistical test

In [None]:
metric = 'accuracy_weighted'
#metric = 'accuracy_unbalanced'
#metric = 'f1score_weighted'
#metric = 'precision_weighted'
#metric = 'recall_weighted'
#metric = 'cohen_kappa'

margin_type = 'Model'
test_pipes = 'Wilcoxon'
corr_method = 'holm'
margins = ['Task','Model']

# Get Accuracies and Median Accuracies
acc_ten, med_acc, pairs, n = get_TensorMetrics(
    root_folder, margin_type, convert_dict, task_folder, task_names, models, pipelines,
    srate[0], metric, out_folder, in_folder, pth, verbose
)

# Perform Friedman's Test
f = {}
for u in range(n):
        if margin_type == margins[1]:
            f[models[u]] = friedman_test(med_acc[u,:,:], models[u], pipelines, metric)
        elif margin_type == margins[0]:
            f[task_names[u]] = friedman_test(med_acc[u,:,:], task_names[u], pipelines, metric, verbose)
if verbose:
    print(f)

In [None]:
f

## Bar-plot Visualisation

In [None]:
plot_modality = 'single_plot' # 'multiple_plots' or 'single_plot'
save_img = True

In [None]:
def plot_task_or_model(
    cdf, feat, ax, u, test_pipes, corr_method, 
    task_or_model_names, font, save_img,
    linew, size, model_or_task_names, metric,
    margin_type, letters, convert_dict, verbose
):

    mdf = pd.melt(cdf, id_vars=[feat], var_name='pipeline')    

    sns.boxplot(x=feat, y= 'value', hue='pipeline', data=mdf, showfliers = False, ax=ax,linewidth=linew) 
    sns.stripplot(x=feat, y="value", data=mdf, legend = False, linewidth=linew,
                  hue='pipeline', dodge=True, ax=ax,size=size)
    
    annotator = Annotator(ax=ax, pairs=pairs, data=mdf, x=feat, hue='pipeline', y='value')
    annotator.configure(
        test=test_pipes,text_format='star',hide_non_significant=True, verbose=verbose,
        loc='inside', comparisons_correction = corr_method,line_width=linew,fontsize=font
    )
    
    annotator.apply_and_annotate()
    
    ax.set_xticklabels([convert_dict[i] for i in task_or_model_names],fontsize = font-4)
    ax.set_title(margin_type + ': ' + convert_dict[model_or_task_names[u]],fontsize = font+3)
    ax.set_xlabel(feat,fontsize = font)
    ax.set_ylim(15,140)
    ax.set_ylabel(convert_dict[metric] + ' %',fontsize = font)
    ax.legend(fontsize = font, loc="lower left")
    ax.set_yticks(np.arange(15,105,5))
    ax.set_yticklabels(np.arange(15,105,5),fontsize = font-4)
    ax.text(4.95,135,'('+letters[u]+'-I)',fontsize = font+6)

    if len(save_img)!=0:
        fig.savefig(save_img + model_or_task_names[u] +'.pdf', transparent=False, bbox_inches='tight')
    
    return mdf

In [None]:
letters = ['A','B','C','D','E','F']

if plot_modality == 'multiple_plots':
    # IF MULTIPLE PLOT
    dim_f = 20
    fig, ax = plt.subplots(2,2,figsize=(dim_f,dim_f))
    font, linew, size = 14, 1, 4
    save_img_path = []
    
elif plot_modality == 'single_plot':
    #IF SINGLE PLOT
    dim_f = 20
    fig, ax = plt.subplots(1,1,figsize=(dim_f,dim_f))
    plt.close()
    font, linew, size = 34, 3, 10
    
    if save_img:
        save_img_path = images_folder
    else:
        save_img_path = []

try:
    ax = ax.flatten()
    flag = True
except:
    ax = ax
    flag = False

for u in range(n): 
    if margin_type == margins[1]:
        feat = margins[0]
        if verbose:
            print('------ ',convert_dict[models[u]],' ------')
        for i in range(len(task_names)):
            df = pd.DataFrame(
                acc_ten[u,i,:,:]*100,
                columns=[convert_dict[i] for i in pipelines]
            ).assign(Task=task_names[i])
            if i==0:
                cdf = df
            else:
                cdf = pd.concat([cdf, df])
        if flag:
            axu = ax[u]
        else:
            fig, axu = plt.subplots(1,1,figsize=(dim_f,dim_f))
            
        mdf = plot_task_or_model(
            cdf, feat, axu, u, test_pipes,
            corr_method, task_names, font, save_img_path,
            linew, size, models, metric, margin_type, 
            letters, convert_dict, verbose
        )

    elif margin_type == margins[0]:
        if verbose:
            print('------ ',convert_dict[task_names[u]],' ------')
        feat = margins[1]
        for i in range(len(models)):
            df = pd.DataFrame(
                acc_ten[u,i,:,:]*100,
                columns=[convert_dict[i] for i in pipelines]
            ).assign(Model=models[i])
            if i==0:
                cdf = df
            else:
                cdf = pd.concat([cdf, df])         
        if flag:
            axu = ax[u]
        else:
            fig, axu = plt.subplots(1,1,figsize=(dim_f,dim_f))
            
        mdf = plot_task_or_model(
            cdf, feat, axu, u, test_pipes,
            corr_method, models, font, save_img_path,
            linew, size, task_names, metric, margin_type,
            letters, convert_dict, verbose
        )
        
    print('--------------------------------------')        

if save_img and plot_modality == 'multiple_plots':
    fig.savefig(images_folder + margin_type + '.pdf', transparent=False, bbox_inches='tight')
    
plt.show()
plt.clf()
plt.close()

### Boxplot (Task Visualisation)

In [None]:
metric = 'accuracy_weighted'
#metric = 'accuracy_unbalanced'
#metric = 'f1score_weighted'
#metric = 'precision_weighted'
#metric = 'recall_weighted'
#metric = 'cohen_kappa'

margin_type = 'Task'
test_pipes = 'Wilcoxon'
corr_method = 'holm'
margins = ['Task','Model']
acc_ten, med_acc, pairs, n = get_TensorMetrics(
    root_folder,margin_type, convert_dict,
    task_folder, task_names, models, pipelines, 
    srate[0], metric, out_folder, in_folder, pth, verbose
)

In [None]:
plot_modality = 'single_plot' # 'multiple_plots' or 'single_plot
save_img = True

In [None]:
letters = ['A','B','C','D','E','F']

if plot_modality == 'multiple_plots':
    # IF MULTIPLE PLOT
    dim_f = 20
    fig, ax = plt.subplots(3,2,figsize=(int(dim_f-5),dim_f))
    font, linew, size = 14, 1, 4
    save_img_path = []
    
elif plot_modality == 'single_plot':
    #IF SINGLE PLOT
    dim_f = 20
    fig, ax = plt.subplots(1,1,figsize=(dim_f,dim_f))
    plt.close()
    font, linew, size = 34, 3, 10
    if save_img:
        save_img_path = images_folder
    else:
        save_img_path = []
    
try:
    ax = ax.flatten()
    flag = True
except:
    ax = ax
    flag = False

pairs_v1 = []
for j in range(len(pipelines)):
    for i in range(len(models)):
        for k in range(i+1, len(models)):
            pairs_v1.append([(convert_dict[pipelines[j]],convert_dict[models[i]]),
                             (convert_dict[pipelines[j]],convert_dict[models[k]])])

for u in range(n):
    
    if margin_type == margins[0]:
        if verbose:
            print('------ ',convert_dict[task_names[u]],' ------')        
        for i in range(len(pipelines)):
            df = pd.DataFrame(
                (acc_ten[u,:,:,i].T)*100,
                columns=[convert_dict[j] for j in models]
            ).assign(Pipelines=convert_dict[pipelines[i]])
            if i==0:
                cdf = df
            else:
                cdf = pd.concat([cdf, df])   

        mdf = pd.melt(cdf, id_vars=['Pipelines'], var_name='Models')  
        
        if flag:
            axu = ax[u]
        else:
            fig, axu = plt.subplots(1,1,figsize=(dim_f,int(dim_f-5)))
            
        sns.boxplot(x='Pipelines', y= 'value', hue='Models', data=mdf, showfliers = False, ax=axu,linewidth=linew) 
        sns.stripplot(
            x='Pipelines', y="value", data=mdf,
            legend = False, linewidth=linew,
            hue='Models', dodge=True, ax=axu, size=size
        )
        
        annotator = Annotator(
            ax=axu, pairs=pairs_v1, data=mdf,
            x='Pipelines', hue='Models', y='value'
        )
        annotator.configure(
            test=test_pipes,text_format='star',hide_non_significant=True,
            verbose=verbose, loc='inside', comparisons_correction=corr_method,
            line_width=linew,fontsize=font
        )
        annotator.apply_and_annotate()

        axu.set_xticklabels([convert_dict[j] for j in pipelines],fontsize = font-4)
        axu.set_title(margin_type + ': ' + convert_dict[task_names[u]],fontsize = font+3)
        axu.set_xlabel('Pipelines',fontsize = font)
        axu.set_ylim(15,140)
        
    print('--------------------------------------')        
    axu.set_ylabel(convert_dict[metric] + ' %',fontsize = font)
    if u==0 or u==1:
        axu.legend(fontsize = font, loc="lower left")
    else:
        axu.legend(fontsize = font, loc="upper left")
    axu.set_yticks(np.arange(15,105,5))
    axu.set_yticklabels(np.arange(15,105,5),fontsize = font-4)
    axu.text(3.13,135,'('+letters[u]+')',fontsize = font+6)
    
    if len(save_img_path)!=0:
        fig.savefig(save_img_path + task_names[u] +'.pdf', transparent=False, bbox_inches='tight')
    
if save_img and plot_modality == 'multiple_plots':
    fig.savefig(images_folder + margin_type + '.pdf', transparent=False, bbox_inches='tight')
    
plt.show()
plt.clf()
plt.close()

## CD Plot

In [None]:
#taken from https://gist.github.com/janezd and modified
def compute_CD(avranks, n, alpha=0.05, test="nemenyi"):
    """
    Returns critical difference for Nemenyi or Bonferroni-Dunn test
    according to given alpha (either alpha="0.05" or alpha="0.1") for average
    ranks and number of tested datasets N. Test can be either "nemenyi" for
    for Nemenyi two tailed test or "bonferroni-dunn" for Bonferroni-Dunn test.
    Args:
	avranks (int or list): list of average ranks or the number of methods
	n (int): number of data sets
	alpha (float): alpha level; must be either 0.05 or 0.1
	test (str): "nemenyi" or "bonferroni-dunn"
    """
    k = avranks if isinstance(avranks, int) else len(avranks)
    d = {("nemenyi", 0.05): [0, 0, 1.959964, 2.343701, 2.569032, 2.727774,
                               2.849705, 2.94832, 3.030879, 3.101730, 3.163684,
                               3.218654, 3.268004, 3.312739, 3.353618, 3.39123,
                               3.426041, 3.458425, 3.488685, 3.517073,
                               3.543799],
         ("nemenyi", 0.1): [0, 0, 1.644854, 2.052293, 2.291341, 2.459516,
                              2.588521, 2.692732, 2.779884, 2.854606, 2.919889,
                              2.977768, 3.029694, 3.076733, 3.119693, 3.159199,
                              3.195743, 3.229723, 3.261461, 3.291224, 3.319233],
         ("bonferroni-dunn", 0.05): [0, 0, 1.960, 2.241, 2.394, 2.498, 2.576,
                                       2.638, 2.690, 2.724, 2.773],
         ("bonferroni-dunn", 0.1): [0, 0, 1.645, 1.960, 2.128, 2.241, 2.326,
                                      2.394, 2.450, 2.498, 2.539]}
    q = d[(test, alpha)]
    cd = q[k] * (k * (k + 1) / (6.0 * n)) ** 0.5
    return cd


In [None]:
#taken from https://gist.github.com/janezd and modified
def graph_ranks(avranks, names, cd=None, custom_string=None, cdmethod=None, letter=None, lowv=None, highv=None,
                width=6, textspace=1, reverse=False, filename=None, **kwargs):
    """
    Draws a CD graph, which is used to display  the differences in methods'
    performance. See Janez Demsar, Statistical Comparisons of Classifiers over
    Multiple Data Sets, 7(Jan):1--30, 2006.
    Needs matplotlib to work.
    The image is ploted on `plt` imported using `import matplotlib.pyplot as plt`.
    Args:
        avranks (list of float): average ranks of methods.
        names (list of str): names of methods.
        cd (float): Critical difference used for statistically significance of
            difference between methods.
        cdmethod (int, optional): the method that is compared with other methods
            If omitted, show pairwise comparison of methods
        lowv (int, optional): the lowest shown rank
        highv (int, optional): the highest shown rank
        width (int, optional): default width in inches (default: 6)
        textspace (int, optional): space on figure sides (in inches) for the
            method names (default: 1)
        reverse (bool, optional):  if set to `True`, the lowest rank is on the
            right (default: `False`)
        filename (str, optional): output file name (with extension). If not
            given, the function does not write a file.
    """
    width = float(width)
    textspace = float(textspace)

    def nth(l, n):
        n = lloc(l, n)
        return [a[n] for a in l]

    def lloc(l, n):
        if n < 0:
            return len(l[0]) + n
        else:
            return n

    def mxrange(lr):
        if not len(lr):
            yield ()
        else:
            # it can work with single numbers
            index = lr[0]
            if isinstance(index, int):
                index = [index]
            for a in range(*index):
                for b in mxrange(lr[1:]):
                    yield tuple([a] + list(b))

    def print_figure(fig, *args, **kwargs):
        canvas = FigureCanvasAgg(fig)
        canvas.print_figure(*args, **kwargs)

    sums = avranks

    tempsort = sorted([(a, i) for i, a in enumerate(sums)], reverse=reverse)
    ssums = nth(tempsort, 0)
    sortidx = nth(tempsort, 1)
    nnames = [names[x] for x in sortidx]

    if lowv is None:
        lowv = min(1, int(math.floor(min(ssums))))
    if highv is None:
        highv = max(len(avranks), int(math.ceil(max(ssums))))

    cline = 0.4
    k = len(sums)
    lines = None
    linesblank = 0
    scalewidth = width - 2 * textspace

    def rankpos(rank):
        if not reverse:
            a = rank - lowv
        else:
            a = highv - rank
        return textspace + scalewidth / (highv - lowv) * a

    distanceh = 0.25

    if cd and cdmethod is None:
        # get pairs of non significant methods
        def get_lines(sums, hsd):
            # get all pairs
            lsums = len(sums)
            allpairs = [(i, j) for i, j in mxrange([[lsums], [lsums]]) if j > i]
            # remove not significant
            notSig = [(i, j) for i, j in allpairs
                      if abs(sums[i] - sums[j]) <= hsd]
            # keep only longest

            def no_longer(ij_tuple, notSig):
                i, j = ij_tuple
                for i1, j1 in notSig:
                    if (i1 <= i and j1 > j) or (i1 < i and j1 >= j):
                        return False
                return True

            longest = [(i, j) for i, j in notSig if no_longer((i, j), notSig)]

            return longest

        lines = get_lines(ssums, cd)
        linesblank = 0.2 + 0.2 + (len(lines) - 1) * 0.1
        # add scale
        distanceh = 0.25
        cline += distanceh

    # calculate height needed height of an image
    minnotsignificant = max(2 * 0.2, linesblank)
    height = cline + ((k + 1) / 2) * 0.2 + minnotsignificant

    fig = plt.figure(figsize=(width, height))
    fig.set_facecolor('white')
    ax = fig.add_axes([0, 0, 1, 1])  # reverse y axis
    ax.set_axis_off()

    hf = 1. / height  # height factor
    wf = 1. / width

    def hfl(l):
        return [a * hf for a in l]
    def wfl(l):
        return [a * wf for a in l]
        
    # Upper left corner is (0,0).
    ax.plot([0, 1], [0, 1], c="w")
    ax.set_xlim(0, 1)
    ax.set_ylim(1, 0)

    def line(l, color='k', **kwargs):
        """
        Input is a list of pairs of points.
        """
        ax.plot(wfl(nth(l, 0)), hfl(nth(l, 1)), color=color, **kwargs)

    def text(x, y, s, *args, **kwargs):
        ax.text(wf * x, hf * y, s, *args, **kwargs)

    line([(textspace, cline), (width - textspace, cline)], linewidth=0.7)

    bigtick = 0.1
    smalltick = 0.05

    tick = None
    for a in list(np.arange(lowv, highv, 0.25)) + [highv]:
        tick = smalltick
        if a == int(a):
            tick = bigtick
        line([(rankpos(a), cline - tick / 2),
              (rankpos(a), cline)],
             linewidth=0.7)

    for a in range(lowv, highv + 1):
        text(rankpos(a), cline - tick / 2 - 0.05, str(a),
             ha="center", va="bottom")

    k = len(ssums)

    for i in range(math.ceil(k / 2)):
        chei = cline + minnotsignificant + i * 0.2
        line([(rankpos(ssums[i]), cline),
              (rankpos(ssums[i]), chei),
              (textspace - 0.1, chei)],
             linewidth=0.7)
        text(textspace - 0.2, chei, nnames[i], ha="right", va="center")

    for i in range(math.ceil(k / 2), k):
        chei = cline + minnotsignificant + (k - i - 1) * 0.2
        line([(rankpos(ssums[i]), cline),
              (rankpos(ssums[i]), chei),
              (textspace + scalewidth + 0.1, chei)],
             linewidth=0.7)
        text(textspace + scalewidth + 0.2, chei, nnames[i],
             ha="left", va="center")

    if cd and cdmethod is None:
        # upper scale
        if not reverse:
            begin, end = rankpos(lowv), rankpos(lowv + cd)
        else:
            begin, end = rankpos(highv), rankpos(highv - cd)

        line([(begin, distanceh), (end, distanceh)], linewidth=0.7)
        line([(begin, distanceh + bigtick / 2),
              (begin, distanceh - bigtick / 2)],
             linewidth=0.7)
        line([(end, distanceh + bigtick / 2),
              (end, distanceh - bigtick / 2)],
             linewidth=0.7)
        text((begin + end) / 2, distanceh - 0.05, "CD" + " | " + custom_string,
             ha="center", va="bottom")
        text(end+1.5,distanceh - 0.05,'('+letter+'-II)')

        # no-significance lines
        def draw_lines(lines, side=0.05, height=0.1):
            start = cline + 0.2
            for l, r in lines:
                line([(rankpos(ssums[l]) - side, start),
                      (rankpos(ssums[r]) + side, start)],
                     linewidth=2.5)
                start += height

        draw_lines(lines)

    elif cd:
        begin = rankpos(avranks[cdmethod] - cd)
        end = rankpos(avranks[cdmethod] + cd)
        line([(begin, cline), (end, cline)],
             linewidth=2.5)
        line([(begin, cline + bigtick / 2),
              (begin, cline - bigtick / 2)],
             linewidth=2.5)
        line([(end, cline + bigtick / 2),
              (end, cline - bigtick / 2)],
             linewidth=2.5)

    if filename:
        #print_figure(fig, filename, **kwargs)
        fig.tight_layout()
        plt.margins(0,0)
        fig.savefig(filename +'_CD.pdf', transparent=False, bbox_inches='tight',pad_inches=0.0)

In [None]:
save_img = True

#metric = 'accuracy_unbalanced'
metric = 'accuracy_weighted'
#metric = 'f1score_weighted'
#metric = 'precision_weighted'
#metric = 'recall_weighted'
#metric = 'cohen_kappa'

posthoc_test = "nemenyi"
margin_type = 'Model'
margins = ['Task','Model']
acc_ten, med_acc, pairs, n = get_TensorMetrics(
    root_folder, margin_type, convert_dict,
    task_folder, task_names, models, pipelines, 
    srate[0], metric, out_folder, in_folder, pth, verbose
)

f = {}
for u in range(n):
        if margin_type == margins[1]:
            f[models[u]] = friedman_test(med_acc[u,:,:], models[u], pipelines, metric)
        elif margin_type == margins[0]:
            f[task_names[u]] = friedman_test(med_acc[u,:,:], task_names[u], pipelines, metric)
            

In [None]:
if margin_type == margins[1]:
    for u in range(n):
        cd = compute_CD(
            f[models[u]]['avg_rank'], len(task_names), alpha=pth, test=posthoc_test
        )
        s = margin_type + ': ' + convert_dict[models[u]]
        if save_img:
            filename = images_folder + models[u]
        else:
            filename = None
        graph_ranks(
            f[models[u]]['avg_rank'],
            [convert_dict[i] for i in pipelines],
            cd, s, filename = filename, letter=letters[u]
        )
        
elif margin_type == margins[0]:
    for u in range(n):
        cd = compute_CD(f[task_names[u]]['avg_rank'], len(models), alpha=pth, test=posthoc_test)
        s = margin_type + ': ' + task_names[u]
        
        if save_img:
            filename = images_folder + task_names[u]
        else:
            filename = None
        graph_ranks(
            f[task_names[u]]['avg_rank'],
            [convert_dict[i] for i in pipelines],
            cd, s, filename = filename,letter=letters[u]
        )

## Pie-Charts Visualisation

In [None]:
sns.set_style("white")
save_img = True

fig, ax = plt.subplots(2,2,figsize=(10,10))
fig.tight_layout()
axs = ax.flatten()
font=17
fig.suptitle('Best Performing Pipeline',fontsize=font+6,y=1.05)

Piecounts = np.zeros((len(models),len(pipelines)))
for u in range(len(models)):
    for i in range(len(task_names)):
        for z in range(in_folder*out_folder):
            acc_ten_pipes = acc_ten[u,i,z,:]
            Piecounts[u, int(np.argmax(acc_ten_pipes))] += 1
    ax = axs[u]
    patches, texts, autotexts = ax.pie(
        Piecounts[u,:], autopct='%1.1f%%',
        textprops={'size': font+2}, labeldistance=1.08
    )
    if u>=2:
        ax.set_title('Model: ' + convert_dict[models[u]], fontsize=font+4, y=-0.03)
    else:
        ax.set_title('Model: ' + convert_dict[models[u]], fontsize=font+4, y=0.97)
        
    if u==0:
        ax.legend([convert_dict[j] for j in pipelines],fontsize=font,bbox_to_anchor=(0.81, 0.15))
    ax.axis('equal')
    
if save_img:
    fig.savefig(images_folder + 'Model_PIE.pdf', transparent=False, bbox_inches='tight')
