# Importing

In [1]:
from pathlib import Path
from scipy.stats.mstats import gmean
import pandas as pd
from scipy.stats import wilcoxon, ttest_ind
from scipy.stats import shapiro
from scipy.stats import friedmanchisquare
import numpy as np

# Study design 1

Althought I am comparing more than two classifiers (${k = 3}$, tree) on multiple datasets (tree, ${N=3}$), I cannot use ANOVA (data did not pass the Shapiro–Wilk test) and Friedman test (due to the assumption that the test statistic has a ${\chi}^2$ distribution, the p-value is only reliable if ${N > 10}$ and ${k > 5}$) (demvsar2006statistical). Thus, I am calculating the average statistic per dataset, then use these averages to compare the performance on using the Wilcoxon test.

@article{demvsar2006statistical,
  title={Statistical comparisons of classifiers over multiple data sets},
  author={Dem{\v{s}}ar, Janez},
  journal={The Journal of Machine Learning Research},
  volume={7},
  pages={1--30},
  year={2006},
  publisher={JMLR. org}
}

## Implementation

In [61]:
def caclculate_average_for_ds(dataset_name, path):
    TF_AL_ds_av = []
    TF_non_AL_ds_av = []
    DS_ad_av = []
    for directory in Path(path).iterdir():
        if str(directory).split('/')[-1] != 'figures':
            if str(directory).split('/')[-1].split('_')[1] in ['SF', 'SP1', 'SP2']:
                TF_AL = pd.read_csv(directory / 'TF_ML_AL.csv').iloc[:, 2:].mean()
                TF_non_AL = pd.read_csv(directory / 'TF_ML_non_AL.csv').iloc[:, 2:].mean()
                DS = pd.read_csv(directory / 'DeepSCAMs.csv').iloc[:, 2:].mean()
                TF_AL_ds_av.append(TF_AL.tolist())
                TF_non_AL_ds_av.append(TF_non_AL.tolist())
                DS_ad_av.append(DS.tolist())
    res = [np.array(it) for it in [TF_AL_ds_av, TF_non_AL_ds_av, DS_ad_av]]
    return res
            

## SF

In [86]:
TF_AL_ds_av_1, TF_non_AL_ds_av_1, DS_ad_av_1 = caclculate_average_for_ds('SF', 
                                                                         '/home/khali/scams/Results/Study_1')

In [87]:
pd.DataFrame(TF_non_AL_ds_av_1)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,0.815843,0.891271,0.966699,0.701538,0.757625,0.490036,0.70713,0.756586,0.806043,0.623214,0.551099,0.349265
1,0.690072,0.802087,0.914103,0.606154,0.303773,0.279071,0.771483,0.823425,0.875366,0.753455,0.329306,0.299038
2,0.80202,0.882638,0.963255,0.793846,0.785679,0.594243,0.736724,0.781723,0.826721,0.708163,0.721009,0.419166
3,0.772519,0.861243,0.949966,0.723077,0.753633,0.487937,0.751429,0.791077,0.830724,0.722441,0.78797,0.403838
4,0.76871,0.85759,0.946471,0.547692,0.678962,0.293666,0.653881,0.701404,0.748927,0.382122,0.480951,0.205384
5,0.699413,0.808159,0.916906,0.589231,0.222841,0.331133,0.765796,0.818332,0.870869,0.743431,0.254626,0.291165
6,0.780155,0.866888,0.953621,0.772308,0.762342,0.558177,0.726259,0.772518,0.818778,0.704348,0.711964,0.416924
7,0.777735,0.864991,0.952246,0.732308,0.762903,0.513556,0.758147,0.797285,0.836424,0.734971,0.796566,0.429708
8,0.685683,0.797723,0.909763,0.58,0.212563,0.272264,0.730746,0.789745,0.848744,0.750545,0.26273,0.244789


In [88]:
pd.DataFrame(TF_AL_ds_av_1).iloc[::3, :]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,0.87572,0.935436,0.994723,0.818462,0.829219,0.658837,0.805077,0.84476,0.884447,0.744388,0.63595,0.486215
3,0.852505,0.918786,0.985928,0.833846,0.83977,0.680352,0.818928,0.852702,0.886522,0.799213,0.840355,0.575861
6,0.867392,0.928558,0.989766,0.864615,0.86245,0.737259,0.796456,0.836033,0.875674,0.775959,0.780949,0.552618


In [89]:
pd.DataFrame(TF_AL_ds_av_1).iloc[1::3, :]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
1,0.807201,0.887666,0.969823,0.818462,0.788938,0.660542,0.866674,0.90405,0.941608,0.852727,0.727775,0.629982
4,0.844546,0.915228,0.987187,0.746154,0.78082,0.55226,0.761145,0.80104,0.841286,0.63831,0.574746,0.371246
7,0.845926,0.913947,0.98431,0.836923,0.841353,0.686125,0.822001,0.855345,0.888769,0.798428,0.841772,0.572258


In [90]:
pd.DataFrame(TF_AL_ds_av_1).iloc[2::3, :]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
2,0.871232,0.931689,0.992655,0.866154,0.863792,0.736797,0.81228,0.850031,0.887859,0.789031,0.795456,0.579533
5,0.796125,0.881214,0.968144,0.821538,0.787553,0.666091,0.856391,0.895481,0.934808,0.848175,0.718577,0.617422
8,0.844565,0.913188,0.983838,0.852308,0.830512,0.719846,0.817781,0.864579,0.91262,0.829818,0.679905,0.562314


### As we can see hereafter with the SF dataset, although all values for AL TF are higher than for non-AL TF and DeepSCAMs, with this study design, the differences are not statistically significant. We have only 3 samples, thus it is not possible to achieve significant difference.  

In [91]:
TF_AL_ds_av_1.shape[1]

12

In [92]:
for dim in range(TF_AL_ds_av_1.shape[1]):
    _, p_al_non_al = wilcoxon(TF_AL_ds_av_1[:, dim], TF_non_AL_ds_av_1[:, dim])
    _, p_al_DS = wilcoxon(TF_AL_ds_av_1[:, dim], DS_ad_av_1[:, dim])
    print('AL vs DS:', p_al_DS)
    print('non AL vs. AL:', p_al_non_al,)
    

AL vs DS: 0.00390625
non AL vs. AL: 0.00390625
AL vs DS: 0.00390625
non AL vs. AL: 0.00390625
AL vs DS: 0.00390625
non AL vs. AL: 0.00390625
AL vs DS: 0.00390625
non AL vs. AL: 0.00390625
AL vs DS: 0.00390625
non AL vs. AL: 0.00390625
AL vs DS: 0.00390625
non AL vs. AL: 0.00390625
AL vs DS: 0.00390625
non AL vs. AL: 0.00390625
AL vs DS: 0.00390625
non AL vs. AL: 0.00390625
AL vs DS: 0.00390625
non AL vs. AL: 0.00390625
AL vs DS: 0.00390625
non AL vs. AL: 0.00390625
AL vs DS: 0.00390625
non AL vs. AL: 0.00390625
AL vs DS: 0.00390625
non AL vs. AL: 0.00390625


# Study design 2

In [93]:
def caclculate_p_each_study(path):
    TF_AL_TF_non_AL = []
    TF_AL_ds = []
    study_names = []
    TF_AL_codes = []
    columns = []
    for directory in Path(path).iterdir():
        if str(directory).split('/')[-1].split('_')[1] in ['SF', 'SP1', 'SP2']:
            study_names.append(str(directory).split('/')[-1])
            TF_AL = pd.read_csv(directory / 'TF_ML_AL.csv').iloc[:, 2:]
            TF_non_AL = pd.read_csv(directory / 'TF_ML_non_AL.csv').iloc[:, 2:]
            DS = pd.read_csv(directory / 'DeepSCAMs.csv').iloc[:, 2:]
            run_results_1 = []
            run_results_2 = []
            code_for_TF_AL = []
            for col in TF_AL.columns:
                _, p_v_al_non_al = wilcoxon(TF_AL[col], TF_non_AL[col])
                run_results_1.append(p_v_al_non_al*12)
                _, p_v_al_ds = wilcoxon(TF_AL[col], DS[col])
                run_results_2.append(p_v_al_ds*12)
                if p_v_al_non_al*12 < 0.05 and p_v_al_ds*12 < 0.05 and TF_non_AL[col].mean() < TF_AL[col].mean() > DS[col].mean():
                    code_for_TF_AL.append('A')
                elif TF_non_AL[col].mean() < TF_AL[col].mean() > DS[col].mean() and ((p_v_al_non_al*12 > 0.05 or p_v_al_ds*12 > 0.05) or (p_v_al_non_al*12 > 0.05 and p_v_al_ds*12 > 0.05)):
                    code_for_TF_AL.append('B')
                else:
                    code_for_TF_AL.append('C')
                
            TF_AL_TF_non_AL.append(run_results_1)
            TF_AL_ds.append(run_results_2)
            TF_AL_codes.append(code_for_TF_AL)
    columns.append(TF_AL.columns.tolist())
    return TF_AL_TF_non_AL, TF_AL_ds, TF_AL_codes, study_names, columns

### Study 1

In [156]:
TF_AL_TF_non_AL_1, TF_AL_ds_1, TF_AL_codes_1, directories_1, columns = caclculate_p_each_study('/home/khali/scams/Results/Study_1')

In [159]:
TF_AL_codes_1 = pd.DataFrame(TF_AL_codes_1, index=[directories_1], columns=columns)

In [180]:
TF_AL_codes_1.to_csv('TF_AL_codes_1.csv')

In [160]:
TF_AL_codes_1.isin(['C']).sum(axis=0).sum()

0

In [161]:
TF_AL_codes_1.isin(['B']).sum(axis=0).sum()

13

In [162]:
TF_AL_codes_1.isin(['A']).sum(axis=0).sum()

95

In [163]:
13/(13+95)

0.12037037037037036

In [164]:
95/(13+95)

0.8796296296296297

In [174]:
TF_AL_TF_non_AL_1 = pd.DataFrame(TF_AL_TF_non_AL_1, index=[directories_1], columns=columns)

In [175]:
TF_AL_TF_non_AL_1.to_csv('TF_AL_TF_non_AL_1.csv')

In [177]:
TF_AL_ds_1 = pd.DataFrame(TF_AL_ds_1, index=[directories], columns=columns)

In [178]:
TF_AL_ds_1.to_csv('TF_AL_ds_1.csv')

## Study 2

In [165]:
TF_AL_TF_non_AL_2, TF_AL_ds_2, TF_AL_codes_2, directories_2, columns = caclculate_p_each_study('/home/khali/scams/Results/Study_2')



In [166]:
TF_AL_codes_2 = pd.DataFrame(TF_AL_codes_2, index=[directories_2], columns=columns)

In [167]:
TF_AL_codes_2

Unnamed: 0,AUC_LB_test,AUC_test,AUC_UB_test,Accuracy_test,F1_test,MCC_test,AUC_LB_validation,AUC_validation,AUC_UB_validation,Accuracy_validation,F1_validation,MCC_validation
ADASYN_SF_TTS,A,A,A,A,A,A,A,A,A,A,A,A
SMOTE_SP2_TTS,A,A,A,A,A,A,A,A,A,A,A,A
IHT_SF_TTS,B,B,B,A,B,A,A,A,A,A,A,A
CNN_SP2_TTS,A,A,A,A,A,A,A,A,A,A,A,A
CNN_SF_TTS,B,A,A,A,B,A,A,A,A,A,A,A
SMOTE_SF_TTS,A,A,A,A,A,A,A,A,A,A,A,A
ADASYN_SP2_TTS,A,A,A,A,A,A,A,A,A,A,A,A
IHT_SP2_TTS,B,B,B,B,B,B,A,A,A,A,A,A


In [168]:
TF_AL_codes_2.isin(['C']).sum(axis=0).sum()

0

In [169]:
TF_AL_codes_2.isin(['B']).sum(axis=0).sum()

12

In [170]:
TF_AL_codes_2.isin(['A']).sum(axis=0).sum()

84

In [171]:
12/(12+84)

0.125

In [172]:
84/(12+84)

0.875