# Statistical Significance

In [1]:
from scipy.stats import ttest_ind as ttest

In [2]:
def t_test(a,b,verbose=True):
    results = ttest(a,b)
    t = results[0]
    p = results[1]
    if verbose:
        # print('t statistic', t)
        print('p value', p)
        print('p<0.05', p<0.05)
    return p

## Yuan style validation
CNN_208 used False False i.e. test set unfiltered, valid set unfiltered. This is the normal way to evaluate using all the data.

CNN_209 used True True i.e. exclude RCI -2 to 0 from train set and valid set. This follows the data preparation in Yuan et al.

CNN_210 used True False i.e. train on tails but valid on all.

CNN_211 used False True i.e. train on all but valid on tails.

In [3]:
accuracy_208 = [74.41037735849056, 78.63046044864227, 79.10271546635182, 76.50531286894923, 79.92916174734357, 77.59433962264151, 78.39433293978747, 70.95631641086186, 78.04014167650531, 76.03305785123968]
accuracy_209 = [83.28402366863905, 87.08272859216255, 85.58823529411764, 85.93974175035868, 85.5072463768116, 81.85840707964603, 82.08516886930984, 80.84795321637426, 85.75624082232012, 84.7457627118644]
t = t_test(accuracy_208,accuracy_209)
accuracy_210 = [79.24528301886792, 80.28335301062573, 86.06847697756788, 72.01889020070838, 74.49822904368358, 79.83490566037736, 79.456906729634, 79.92916174734357, 79.81109799291617, 80.6375442739079]
t = t_test(accuracy_208,accuracy_210)
accuracy_211 = [82.36130867709815, 82.25806451612904, 83.85714285714285, 79.01785714285714, 86.37037037037038, 82.40469208211144, 81.30434782608695, 75.94752186588921, 77.43813682678311, 84.71615720524017]
t = t_test(accuracy_208,accuracy_211)

p value 2.157612428416546e-06
p<0.05 True
p value 0.14303036136871453
p<0.05 False
p value 0.002730462293606183
p<0.05 True


In [4]:
precision_208 = [86.77918424753868, 85.73281452658884, 86.01583113456465, 85.61827956989248, 85.33163265306123, 85.90425531914893, 85.19003931847968, 86.71641791044776, 85.78947368421052, 86.54646324549236]
precision_209 = [91.9732441471572, 91.23076923076923, 93.79084967320262, 90.01512859304086, 88.43843843843844, 88.9967637540453, 89.93506493506493, 92.16354344122658, 91.07981220657277, 91.13149847094802]
t = t_test(precision_208,precision_209)
precision_210 = [85.0383631713555, 85.76923076923076, 90.8745247148289, 83.810888252149, 86.44793152639087, 86.9281045751634, 85.99221789883269, 86.39896373056995, 86.26943005181347, 85.45918367346938]
t = t_test(precision_208,precision_210)
precision_211 = [92.25806451612904, 90.84967320261438, 90.12345679012346, 88.75638841567292, 91.31121642969984, 90.39087947882736, 91.58415841584159, 89.92932862190813, 91.01899827288429, 89.75155279503106]
t = t_test(precision_208,precision_211)

p value 2.992278997557578e-08
p<0.05 True
p value 0.5853262226048538
p<0.05 False
p value 2.1444448501122557e-10
p<0.05 True


In [5]:
recall_208 = [83.37837837837839, 90.30054644808743, 90.17980636237898, 87.37997256515774, 92.40331491712708, 88.4931506849315, 90.27777777777779, 78.72628726287263, 89.31506849315069, 85.47945205479452]
recall_209 = [89.43089430894308, 94.88, 90.53627760252367, 94.896331738437, 96.24183006535948, 90.9090909090909, 90.22801302931596, 86.4217252396166, 93.56913183279742, 92.26006191950464]
t = t_test(recall_208,recall_209)
recall_210 = [91.85082872928176, 92.27586206896552, 93.97116644823068, 82.51057827926658, 83.35625859697386, 90.35326086956522, 90.94650205761316, 91.12021857923497, 91.10807113543092, 93.05555555555556]
t = t_test(recall_208,recall_210)
recall_211 = [88.27160493827161, 89.5330112721417, 92.25908372827804, 87.41610738255034, 93.98373983739837, 90.09740259740259, 87.67772511848341, 82.49594813614263, 83.65079365079366, 93.67909238249594]
t = t_test(recall_208,recall_211)

p value 0.014192027936472677
p<0.05 True
p value 0.18457856022512606
p<0.05 False
p value 0.4689392079337885
p<0.05 False


In [6]:
AUROC_208 = [48.06994494494495, 52.78569731527679, 51.46734038281355, 48.19348538745902, 50.857925706328885, 55.24959368469933, 48.38965441819772, 47.08796399890605, 52.59454396440698, 52.47746165554384]
AUROC_209 = [62.00986272157804, 53.49625, 53.15800301741873, 55.83048530416952, 53.866054968996146, 50.947582927657656, 51.11332587874957, 53.79530681943373, 57.27560085018257, 47.73294716868072]
t = t_test(AUROC_208,AUROC_209)
AUROC_210 = [52.810327927285684, 52.95534200113058, 56.21606440741434, 53.6635596165246, 54.54034846400732, 54.21620244565217, 47.908674525121484, 52.30220955096222, 50.528916458323515, 54.559820647419066]
t = t_test(AUROC_208,AUROC_210)
AUROC_211 = [54.21436588103254, 49.57366489796996, 49.32918346655348, 53.838087248322154, 50.40921409214092, 57.42571822117277, 44.93916465729886, 49.17083597585324, 47.60790866054025, 54.156054642278306]
t = t_test(AUROC_208,AUROC_211)

p value 0.04544549542068187
p<0.05 True
p value 0.060223977274324055
p<0.05 False
p value 0.813027198851068
p<0.05 False


In [7]:
AUPRC_208 = [87.27364707246679, 88.51296558040718, 85.8585503768368, 86.35791493584391, 86.29830119423622, 88.37802603763362, 83.67545689365356, 86.66779879994827, 88.59229432214448, 87.67008997461666]
AUPRC_209 = [94.4704625279351, 91.76848473665817, 94.15796470351493, 91.49942288014161, 90.86906716920757, 90.00509541025998, 90.69977722644545, 92.40811719560473, 93.70082240089923, 91.56181864730812]
t = t_test(AUPRC_208,AUPRC_209)
AUPRC_210 = [87.05428012501757, 87.36347301792618, 92.28768648855467, 85.05995267512372, 87.42736262384689, 89.04801670863911, 84.98665673483448, 87.25878882127739, 86.89498551046803, 87.08328957810218]
t = t_test(AUPRC_208,AUPRC_210)
AUPRC_211 = [92.24587532879879, 91.55434434246726, 91.64252597566346, 90.46847333727929, 91.73009904411104, 92.56618075022382, 90.50367253489415, 89.8167392016381, 91.90172278761963, 91.34234222960794]
t = t_test(AUPRC_208,AUPRC_211)

p value 4.883659056246111e-07
p<0.05 True
p value 0.530242953758573
p<0.05 False
p value 2.114611764596128e-07
p<0.05 True


In [8]:
F1_208 = [85.0447966919366, 87.95741849634065, 88.04861580013504, 86.49015614392397, 88.72679045092838, 87.17948717948718, 87.66014834794336, 82.52840909090908, 87.51677852348995, 86.00964851826326]
F1_209 = [90.68425391591097, 93.01960784313727, 92.13483146067416, 92.3913043478261, 92.17527386541472, 89.94276369582992, 90.08130081300813, 89.20032976092332, 92.3076923076923, 91.6923076923077]
t = t_test(F1_208,F1_209)
F1_210 = [88.31341301460823, 88.90365448504983, 92.39690721649485, 83.15565031982942, 84.87394957983193, 88.60759493670885, 88.4, 88.69680851063829, 88.62275449101796, 89.09574468085107]
t = t_test(F1_208,F1_210)
F1_211 = [90.22082018927445, 90.18653690186537, 91.17876658860266, 88.08114961961115, 92.62820512820514, 90.2439024390244, 89.58837772397095, 86.0524091293322, 87.1794871794872, 91.67327517842982]
t = t_test(F1_208,F1_211)

p value 3.491984868681966e-06
p<0.05 True
p value 0.1719251859178094
p<0.05 False
p value 0.002900467833867512
p<0.05 True


In [9]:
MCC_208 = [-0.0331401558702634, -0.0641440458653698, 0.0541342966461918, -0.034942139068395835, -0.014671100610550714, -0.014608496086936427, 0.015544506567229872, -0.024091090703981476, -0.03401261836141422, 0.024952865301754203]
MCC_209 = [0.09633956419839877, 0.07307644399303148, 0.06636423882362208, 0.008292777696914671, -0.04279093032779984, -0.02446392938463269, -0.02340749740790471, 0.0567871609406926, -0.035560505191166, -0.013719928364518711]
t = t_test(MCC_208,MCC_209)
MCC_210 = [-0.03302484872292993, 0.016811737840806862, 0.09771594246326845, 0.006076828336694536, 0.038680681394880574, 0.012163680100928963, -0.0070144009957176364, -0.0022201788217067905, -0.0032824707973748988, 0.04478886744992383]
t = t_test(MCC_208,MCC_210)
MCC_211 = [0.008312779472127918, -0.021348285519107633, -0.03661066849713179, 0.0054690879629985525, 0.027297723561564166, 0.0069416876373039355, -0.01512027771366709, -0.0008926377952096025, -0.057420858269688346, -0.007575870246831303]
t = t_test(MCC_208,MCC_211)

p value 0.16428486540061932
p<0.05 False
p value 0.07839691568740885
p<0.05 False
p value 0.8031020120191505
p<0.05 False
