# RQ1: comparing different classifiers

In [86]:
from scipy import stats
import pandas as pd
import scikit_posthocs as sp
import numpy as np
import pingouin as pg


df = pd.read_excel ('RQ1_Statistics.xlsx')
#print (df)

rf_none_p = df['RF-None-P'].values.tolist()
rf_none_r = df['RF-None-R'].values.tolist()
rf_none_f05 = df['RF-None-F0.5'].values.tolist()

rf_over_p = df['RF-Over-P'].values.tolist()
rf_over_r = df['RF-Over-R'].values.tolist()
rf_over_f05 = df['RF-Over-F0.5'].values.tolist()

lgbm_none_p = df['LGBM-None-P'].values.tolist()
lgbm_none_r = df['LGBM-None-R'].values.tolist()
lgbm_none_f05 = df['LGBM-None-F0.5'].values.tolist()

xgb_none_p = df['XGB-None-P'].values.tolist()
xgb_none_r = df['XGB-None-R'].values.tolist()
xgb_none_f05 = df['XGB-None-F0.5'].values.tolist()

print ('=== RQ1. Trace recommendation F0.5: RF_None, RF_Over, LightGBM_None, XGBoost_None')

print ('\nPrecision: ', stats.friedmanchisquare(rf_none_p, rf_over_p, lgbm_none_p, xgb_none_p))
prec = np.array([rf_none_p, rf_over_p, lgbm_none_p, xgb_none_p])
print ('\nNemenyi prec:\n', sp.posthoc_nemenyi_friedman(prec.T))

print ('\nRecall: ', stats.friedmanchisquare(rf_none_r, rf_over_r, lgbm_none_r, xgb_none_r))
rec = np.array([rf_none_r, rf_over_r, lgbm_none_r, xgb_none_r])
print ('\nNemenyi rec:\n', sp.posthoc_nemenyi_friedman(rec.T))

print ('\nF0.5: ', stats.friedmanchisquare(rf_none_f05, rf_over_f05, lgbm_none_f05, xgb_none_f05))
f05 = np.array([rf_none_f05, rf_over_f05, lgbm_none_f05, xgb_none_f05])
print ('\nNemenyi f05:\n', sp.posthoc_nemenyi_friedman(f05.T))

=== RQ1. Trace recommendation F0.5: RF_None, RF_Over, LightGBM_None, XGBoost_None

Precision:  FriedmanchisquareResult(statistic=23.75999999999999, pvalue=2.803419988472061e-05)

Nemenyi prec:
           0         1         2         3
0  1.000000  0.001000  0.001000  0.160247
1  0.001000  1.000000  0.900000  0.160247
2  0.001000  0.900000  1.000000  0.160247
3  0.160247  0.160247  0.160247  1.000000

Recall:  FriedmanchisquareResult(statistic=23.159999999999997, pvalue=3.7397669510043164e-05)

Nemenyi rec:
           0         1         2         3
0  1.000000  0.005517  0.072567  0.001000
1  0.005517  1.000000  0.799047  0.507386
2  0.072567  0.799047  1.000000  0.109694
3  0.001000  0.507386  0.109694  1.000000

F0.5:  FriedmanchisquareResult(statistic=9.960000000000008, pvalue=0.018909228242395687)

Nemenyi f05:
           0        1         2         3
0  1.000000  0.90000  0.900000  0.109694
1  0.900000  1.00000  0.900000  0.046280
2  0.900000  0.90000  1.000000  0.028569
3  0.10

In [87]:
print('=== RQ1. Effect sizes for trace recommendation')

print('\nF05', 'RFNone', 'RFOver', pg.compute_effsize(rf_none_f05, rf_over_f05, eftype='hedges'))
print('F05', 'RFNone', 'LGBMNone',  pg.compute_effsize(rf_none_f05, lgbm_none_f05, eftype='hedges'))
print('F05', 'RFNone', 'XGBNone',  pg.compute_effsize(rf_none_f05, xgb_none_f05, eftype='hedges'))
print('F05', 'RFOver', 'LGBMNone',  pg.compute_effsize(rf_over_f05, lgbm_none_f05, eftype='hedges'))
print('F05', 'RFOver', 'XGBNone',  pg.compute_effsize(rf_over_f05, xgb_none_f05, eftype='hedges'))
print('F05', 'LGBMNone', 'XGBNone',  pg.compute_effsize(lgbm_none_f05, xgb_none_f05, eftype='hedges'))

print('\nP', 'RFNone', 'RFOver', pg.compute_effsize(rf_none_p, rf_over_p, eftype='hedges'))
print('P', 'RFNone', 'LGBMNone',  pg.compute_effsize(rf_none_p, lgbm_none_p, eftype='hedges'))
print('P', 'RFNone', 'XGBNone',  pg.compute_effsize(rf_none_p, xgb_none_p, eftype='hedges'))
print('P', 'RFOver', 'LGBMNone',  pg.compute_effsize(rf_over_p, lgbm_none_p, eftype='hedges'))
print('P', 'RFOver', 'XGBNone',  pg.compute_effsize(rf_over_p, xgb_none_p, eftype='hedges'))
print('P', 'LGBMNone', 'XGBNone',  pg.compute_effsize(lgbm_none_p, xgb_none_p, eftype='hedges'))

print('\nR', 'RFNone', 'RFOver', pg.compute_effsize(rf_none_r, rf_over_r, eftype='hedges'))
print('R', 'RFNone', 'LGBMNone',  pg.compute_effsize(rf_none_r, lgbm_none_r, eftype='hedges'))
print('R', 'RFNone', 'XGBNone',  pg.compute_effsize(rf_none_r, xgb_none_r, eftype='hedges'))
print('R', 'RFOver', 'LGBMNone',  pg.compute_effsize(rf_over_r, lgbm_none_r, eftype='hedges'))
print('R', 'RFOver', 'XGBNone',  pg.compute_effsize(rf_over_r, xgb_none_r, eftype='hedges'))
print('R', 'LGBMNone', 'XGBNone',  pg.compute_effsize(lgbm_none_r, xgb_none_r, eftype='hedges'))

=== RQ1. Effect sizes for trace recommendation

F05 RFNone RFOver -0.21865347723024303
F05 RFNone LGBMNone -0.034649687798952
F05 RFNone XGBNone -0.9443406731221664
F05 RFOver LGBMNone 0.18702106984411684
F05 RFOver XGBNone -1.0001629711765063
F05 LGBMNone XGBNone -0.9460606473994778

P RFNone RFOver 2.45991175650449
P RFNone LGBMNone 2.202842783474881
P RFNone XGBNone 0.9738707015832264
P RFOver LGBMNone 0.20278322758607656
P RFOver XGBNone -1.3221200164176383
P LGBMNone XGBNone -1.2678034483073184

R RFNone RFOver -1.748313763021317
R RFNone LGBMNone -1.4722147258832685
R RFNone XGBNone -1.7277978326478338
R RFOver LGBMNone 0.0032391919100355814
R RFOver XGBNone -0.40849570829745296
R LGBMNone XGBNone -0.3305571444842793


In [88]:
rf_5050_p = df['RF-5050-P'].values.tolist()
rf_5050_r = df['RF-5050-R'].values.tolist()
rf_5050_f2 = df['RF-5050-F2'].values.tolist()

lgbm_5050_p = df['LGBM-5050-P'].values.tolist()
lgbm_5050_r = df['LGBM-5050-R'].values.tolist()
lgbm_5050_f2 = df['LGBM-5050-F2'].values.tolist()

xgb_5050_p = df['XGB-5050-P'].values.tolist()
xgb_5050_r = df['XGB-5050-R'].values.tolist()
xgb_5050_f2 = df['XGB-5050-F2'].values.tolist()

print ('=== RQ1. Trace maintenance F2: RF_5050, LightGBM_5050, XGBoost_5050')

print ('\nPrecision: ', stats.friedmanchisquare(rf_5050_p, lgbm_5050_p, xgb_5050_p))
prec = np.array([rf_5050_p, lgbm_5050_p, xgb_5050_p])
print ('\nNemenyi Prec:\n', sp.posthoc_nemenyi_friedman(prec.T))

print ('\nRecall: ', stats.friedmanchisquare(rf_5050_r, lgbm_5050_r, xgb_5050_r))
rec = np.array([rf_5050_r, lgbm_5050_r, xgb_5050_r])
print ('\nNemenyi Rec:\n', sp.posthoc_nemenyi_friedman(rec.T))

print ('\nF2: ', stats.friedmanchisquare(rf_5050_f2, lgbm_5050_f2, xgb_5050_f2))
f2 = np.array([rf_5050_f2, lgbm_5050_f2, xgb_5050_f2])
print ('\nNemenyi F2:\n', sp.posthoc_nemenyi_friedman(f2.T))

=== RQ1. Trace maintenance F2: RF_5050, LightGBM_5050, XGBoost_5050

Precision:  FriedmanchisquareResult(statistic=9.800000000000011, pvalue=0.007446583070924297)

Nemenyi Prec:
           0         1         2
0  1.000000  0.004967  0.261172
1  0.004967  1.000000  0.261172
2  0.261172  0.261172  1.000000

Recall:  FriedmanchisquareResult(statistic=15.800000000000011, pvalue=0.0003707435404590862)

Nemenyi Rec:
          0         1         2
0  1.00000  0.001000  0.010210
1  0.00100  1.000000  0.631856
2  0.01021  0.631856  1.000000

F2:  FriedmanchisquareResult(statistic=12.200000000000017, pvalue=0.0022428677194857843)

Nemenyi F2:
           0        1         2
0  1.000000  0.01021  0.004967
1  0.010210  1.00000  0.900000
2  0.004967  0.90000  1.000000


In [89]:
print('=== RQ1. Effect sizes for trace maintenance')

#Effect sizes are printed in the same order as reported in the paper
print('\nF2', 'RF5050', 'LGBM5050', pg.compute_effsize(rf_5050_f2, lgbm_5050_f2, eftype='hedges'))
print('F2', 'RF5050', 'XGB5050',  pg.compute_effsize(rf_5050_f2, xgb_5050_f2, eftype='hedges'))
print('F2', 'LGBM5050', 'XGB5050',  pg.compute_effsize(lgbm_5050_f2, xgb_5050_f2, eftype='hedges'))

print('\nP', 'RF5050', 'LGBM5050', pg.compute_effsize(rf_5050_p, lgbm_5050_p, eftype='hedges'))
print('P', 'RF5050', 'XGB5050',  pg.compute_effsize(rf_5050_p, xgb_5050_p, eftype='hedges'))
print('P', 'LGBM5050', 'XGB5050',  pg.compute_effsize(lgbm_5050_p, xgb_5050_p, eftype='hedges'))

print('\nR', 'RF5050', 'LGBM5050', pg.compute_effsize(rf_5050_r, lgbm_5050_r, eftype='hedges'))
print('R', 'RF5050', 'XGB5050',  pg.compute_effsize(rf_5050_r, xgb_5050_r, eftype='hedges'))
print('R', 'LGBM5050', 'XGB5050',  pg.compute_effsize(lgbm_5050_r, xgb_5050_r, eftype='hedges'))

=== RQ1. Effect sizes for trace maintenance

F2 RF5050 LGBM5050 -1.0823680797895063
F2 RF5050 XGB5050 -1.181151527425768
F2 LGBM5050 XGB5050 -0.06059539803900366

P RF5050 LGBM5050 0.6905754256212175
P RF5050 XGB5050 0.299143141776538
P LGBM5050 XGB5050 -0.42900984495490263

R RF5050 LGBM5050 -1.5758983592666571
R RF5050 XGB5050 -1.4896756526001602
R LGBM5050 XGB5050 0.08716807282496188


# RQ2. Do non-MDD features have an impact?

In [90]:
df = pd.read_excel ('RQ2_Statistics.xlsx')
#print (df)


xgb_none_p = df['XGB-None-P'].values.tolist()
xgb_none_r = df['XGB-None-R'].values.tolist()
xgb_none_f05 = df['XGB-None-F0.5'].values.tolist()

lgbm_5050_p = df['LGBM-5050-P'].values.tolist()
lgbm_5050_r = df['LGBM-5050-R'].values.tolist()
lgbm_5050_f2 = df['LGBM-5050-F2'].values.tolist()

xgbn_none_p = df['XGBn-None-P'].values.tolist()
xgbn_none_r = df['XGBn-None-R'].values.tolist()
xgbn_none_f05 = df['XGBn-None-F0.5'].values.tolist()

lgbmn_5050_p = df['LGBMn-5050-P'].values.tolist()
lgbmn_5050_r = df['LGBMn-5050-R'].values.tolist()
lgbmn_5050_f2 = df['LGBMn-5050-F2'].values.tolist()

print ('=== RQ2. All features vs. only non-MDD features \n')

print('XGB-None-P', stats.wilcoxon(xgb_none_p, xgbn_none_p, method='exact'))
print('XGB-None-R', stats.wilcoxon(xgb_none_r, xgbn_none_r, method='approx', zero_method='pratt'))
print('XGB-None-F05', stats.wilcoxon(xgb_none_f05, xgbn_none_f05, method='exact'))

print('LGBM-5050-P', stats.wilcoxon(lgbm_5050_p, lgbmn_5050_p, method='exact'))
print('LGBM-5050-R', stats.wilcoxon(lgbm_5050_r, lgbmn_5050_r, method='exact'))
print('LGBM-5050-F2', stats.wilcoxon(lgbm_5050_f2, lgbmn_5050_f2, method='exact'))


=== RQ2. All features vs. only non-MDD features 

XGB-None-P WilcoxonResult(statistic=12.0, pvalue=0.130859375)
XGB-None-R WilcoxonResult(statistic=22.0, pvalue=0.6098340436734595)
XGB-None-F05 WilcoxonResult(statistic=17.0, pvalue=0.322265625)
LGBM-5050-P WilcoxonResult(statistic=18.0, pvalue=0.375)
LGBM-5050-R WilcoxonResult(statistic=27.0, pvalue=1.0)
LGBM-5050-F2 WilcoxonResult(statistic=21.0, pvalue=0.556640625)


In [91]:
print ('=== RQ2. Effect size \n')

print('P', 'XGB-None', pg.compute_effsize(xgb_none_p, xgbn_none_p, eftype='hedges'))
print('R', 'XGB-None', pg.compute_effsize(xgb_none_r, xgbn_none_r, eftype='hedges'))
print('F05', 'XGB-None', pg.compute_effsize(xgb_none_f05, xgbn_none_f05, eftype='hedges'))

print('\nP', 'LGBM-5050', pg.compute_effsize(lgbm_5050_p, lgbmn_5050_p, eftype='hedges'))
print('R', 'LGBM-5050', pg.compute_effsize(lgbm_5050_r, lgbmn_5050_r, eftype='hedges'))
print('F2', 'LGBM-5050', pg.compute_effsize(lgbm_5050_f2, lgbmn_5050_f2, eftype='hedges'))

=== RQ2. Effect size 

P XGB-None 0.12662030114255796
R XGB-None -0.027756017876575795
F05 XGB-None 0.050389836322384994

P LGBM-5050 0.22554414138605264
R LGBM-5050 0.011195940361185555
F2 LGBM-5050 0.09124052850247608


# RQ3. Feature reduction

In [92]:
df = pd.read_excel ('RQ3_Statistics.xlsx')
#print (df)

xgb_none_p = df['XGB-None-P'].values.tolist()
xgb_none_r = df['XGB-None-R'].values.tolist()
xgb_none_f05 = df['XGB-None-F0.5'].values.tolist()

lgbm_5050_p = df['LGBM-5050-P'].values.tolist()
lgbm_5050_r = df['LGBM-5050-R'].values.tolist()
lgbm_5050_f2 = df['LGBM-5050-F2'].values.tolist()

xgb40_none_p = df['40XG-Prec'].values.tolist()
xgb40_none_r = df['40XG-Rec'].values.tolist()
xgb40_none_f05 = df['40XG-F05'].values.tolist()

xgb50_none_p = df['50XG-Prec'].values.tolist()
xgb50_none_r = df['50XG-Rec'].values.tolist()
xgb50_none_f05 = df['50XG-F05'].values.tolist()

xgb60_none_p = df['60XG-Prec'].values.tolist()
xgb60_none_r = df['60XG-Rec'].values.tolist()
xgb60_none_f05 = df['60XG-F05'].values.tolist()

lgbm40_5050_p = df['40LG-Prec'].values.tolist()
lgbm40_5050_r = df['40LG-Rec'].values.tolist()
lgbm40_5050_f2 = df['40LG-F2'].values.tolist()

lgbm50_5050_p = df['50LG-Prec'].values.tolist()
lgbm50_5050_r = df['50LG-Rec'].values.tolist()
lgbm50_5050_f2 = df['50LG-F2'].values.tolist()

lgbm60_5050_p = df['60LG-Prec'].values.tolist()
lgbm60_5050_r = df['60LG-Rec'].values.tolist()
lgbm60_5050_f2 = df['60LG-F2'].values.tolist()



print ('=== RQ3. Wilcoxon test \n')

print('XGB-None-P vs 40', stats.wilcoxon(xgb_none_p, xgb40_none_p, method='exact'))
print('XGB-None-R vs 40', stats.wilcoxon(xgb_none_r, xgb40_none_r, method='exact'))
print('XGB-None-F05 vs 40', stats.wilcoxon(xgb_none_f05, xgb40_none_f05, method='exact'))

print('\nXGB-None-P vs 50', stats.wilcoxon(xgb_none_p, xgb50_none_p, method='exact'))
print('XGB-None-R vs 50', stats.wilcoxon(xgb_none_r, xgb50_none_r, method='exact'))
print('XGB-None-F05 vs 50', stats.wilcoxon(xgb_none_f05, xgb50_none_f05, method='exact'))

print('\nXGB-None-P vs 60', stats.wilcoxon(xgb_none_p, xgb60_none_p, method='exact'))
print('XGB-None-R vs 60', stats.wilcoxon(xgb_none_r, xgb60_none_r, method='exact'))
print('XGB-None-F05 vs 60', stats.wilcoxon(xgb_none_f05, xgb60_none_f05, method='exact'))

print('\nLGBM-5050-P vs 40', stats.wilcoxon(lgbm_5050_p, lgbm40_5050_p, method='exact'))
print('LGBM-5050-R vs 40', stats.wilcoxon(lgbm_5050_r, lgbm40_5050_r, method='exact'))
print('LGBM-5050-F2 vs 40', stats.wilcoxon(lgbm_5050_f2, lgbm40_5050_f2, method='exact'))

print('\nLGBM-5050-P vs 50', stats.wilcoxon(lgbm_5050_p, lgbm50_5050_p, method='exact'))
print('LGBM-5050-R vs 50', stats.wilcoxon(lgbm_5050_r, lgbm50_5050_r, method='exact'))
print('LGBM-5050-F2 vs 50', stats.wilcoxon(lgbm_5050_f2, lgbm50_5050_f2, method='exact'))

print('\nLGBM-5050-P vs 60', stats.wilcoxon(lgbm_5050_p, lgbm60_5050_p, method='exact'))
print('LGBM-5050-R vs 60', stats.wilcoxon(lgbm_5050_r, lgbm60_5050_r, method='exact'))
print('LGBM-5050-F2 vs 60', stats.wilcoxon(lgbm_5050_f2, lgbm60_5050_f2, method='exact'))

=== RQ3. Wilcoxon test 

XGB-None-P vs 40 WilcoxonResult(statistic=26.0, pvalue=0.921875)
XGB-None-R vs 40 WilcoxonResult(statistic=6.0, pvalue=0.02734375)
XGB-None-F05 vs 40 WilcoxonResult(statistic=12.0, pvalue=0.130859375)

XGB-None-P vs 50 WilcoxonResult(statistic=24.0, pvalue=0.76953125)
XGB-None-R vs 50 WilcoxonResult(statistic=1.0, pvalue=0.00390625)
XGB-None-F05 vs 50 WilcoxonResult(statistic=12.0, pvalue=0.130859375)

XGB-None-P vs 60 WilcoxonResult(statistic=27.0, pvalue=1.0)
XGB-None-R vs 60 WilcoxonResult(statistic=7.0, pvalue=0.037109375)
XGB-None-F05 vs 60 WilcoxonResult(statistic=11.0, pvalue=0.10546875)

LGBM-5050-P vs 40 WilcoxonResult(statistic=0.0, pvalue=0.001953125)
LGBM-5050-R vs 40 WilcoxonResult(statistic=8.0, pvalue=0.048828125)
LGBM-5050-F2 vs 40 WilcoxonResult(statistic=26.0, pvalue=0.921875)

LGBM-5050-P vs 50 WilcoxonResult(statistic=1.0, pvalue=0.00390625)
LGBM-5050-R vs 50 WilcoxonResult(statistic=2.0, pvalue=0.005859375)
LGBM-5050-F2 vs 50 WilcoxonResult

In [93]:
print ('=== RQ3. Effect size \n')

print('XGB-None-P vs 40', pg.compute_effsize(xgb_none_p, xgb40_none_p, eftype='hedges'))
print('XGB-None-R vs 40', pg.compute_effsize(xgb_none_r, xgb40_none_r, eftype='hedges'))
print('XGB-None-F05 vs 40', pg.compute_effsize(xgb_none_f05, xgb40_none_f05, eftype='hedges'))

print('\nXGB-None-P vs 50', pg.compute_effsize(xgb_none_p, xgb50_none_p, eftype='hedges'))
print('XGB-None-R vs 50', pg.compute_effsize(xgb_none_r, xgb50_none_r, eftype='hedges'))
print('XGB-None-F05 vs 50', pg.compute_effsize(xgb_none_f05, xgb50_none_f05, eftype='hedges'))

print('\nXGB-None-P vs 60', pg.compute_effsize(xgb_none_p, xgb60_none_p, eftype='hedges'))
print('XGB-None-R vs 60', pg.compute_effsize(xgb_none_r, xgb60_none_r, eftype='hedges'))
print('XGB-None-F05 vs 60', pg.compute_effsize(xgb_none_f05, xgb60_none_f05, eftype='hedges'))

print('\nLGBM-5050-P vs 40', pg.compute_effsize(lgbm_5050_p, lgbm40_5050_p, eftype='hedges'))
print('LGBM-5050-R vs 40', pg.compute_effsize(lgbm_5050_r, lgbm40_5050_r, eftype='hedges'))
print('LGBM-5050-F2 vs 40', pg.compute_effsize(lgbm_5050_f2, lgbm40_5050_f2, eftype='hedges'))

print('\nLGBM-5050-P vs 50', pg.compute_effsize(lgbm_5050_p, lgbm50_5050_p, eftype='hedges'))
print('LGBM-5050-R vs 50', pg.compute_effsize(lgbm_5050_r, lgbm50_5050_r, eftype='hedges'))
print('LGBM-5050-F2 vs 50', pg.compute_effsize(lgbm_5050_f2, lgbm50_5050_f2, eftype='hedges'))

print('\nLGBM-5050-P vs 60', pg.compute_effsize(lgbm_5050_p, lgbm60_5050_p, eftype='hedges'))
print('LGBM-5050-R vs 60', pg.compute_effsize(lgbm_5050_r, lgbm60_5050_r, eftype='hedges'))
print('LGBM-5050-F2 vs 60', pg.compute_effsize(lgbm_5050_f2, lgbm60_5050_f2, eftype='hedges'))

=== RQ3. Effect size 

XGB-None-P vs 40 -0.003051415428147418
XGB-None-R vs 40 -0.32071152391511687
XGB-None-F05 vs 40 -0.15847788547396138

XGB-None-P vs 50 0.01688224330930103
XGB-None-R vs 50 -0.33949001155942665
XGB-None-F05 vs 50 -0.1490698740294031

XGB-None-P vs 60 0.0447171668094513
XGB-None-R vs 60 -0.3519271955045422
XGB-None-F05 vs 60 -0.14847048543547137

LGBM-5050-P vs 40 1.0579752002028366
LGBM-5050-R vs 40 -0.56976227671576
LGBM-5050-F2 vs 40 0.05535899848823149

LGBM-5050-P vs 50 0.7195774503912163
LGBM-5050-R vs 50 -0.6408721018281966
LGBM-5050-F2 vs 50 -0.11033883053605074

LGBM-5050-P vs 60 0.647005034466351
LGBM-5050-R vs 60 -0.5888660878576107
LGBM-5050-F2 vs 60 -0.1053732023181261
