In [1]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_rel

In [2]:
data = pd.read_csv('data/CA_All.csv')
print(data.head())
RF_DFT_50 = data['RF_DFT_50']
XGB_MF_pca_700 = data['XGB_MF_pca_700']
XGB_DFT_750 = data['XGB_DFT_750']
TrAB_DFT_All = data['TrAB_DFT_All']

   Run  RF_DFT_50  XGB_MF_pca_700  XGB_DFT_750  TrAB_DFT_All
0    0   0.068619        0.196809     0.343602      0.321043
1    1  -0.188152        0.456268     0.477798      0.740186
2    2   0.459085        0.461223     0.616818      0.676513
3    3   0.340911        0.457019     0.736468      0.768791
4    4  -0.010733        0.297803     0.366012      0.452751


In [3]:
print('TrAB/DFT (Table S15) & RF/DFT (Table S9), in CA')

t_stat, p_value = ttest_rel(TrAB_DFT_All, RF_DFT_50)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S15) & RF/DFT (Table S9), in CA
t-statistic: 24.596629507689205
p-value: 5.814427707435613e-44
There is a significant difference (reject the null hypothesis).


In [4]:
print('TrAB/DFT (Table S15) & XGB/MF_pca trained on 700 data points (Table S11), in CA')

t_stat, p_value = ttest_rel(TrAB_DFT_All, XGB_MF_pca_700)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S15) & XGB/MF_pca trained on 700 data points (Table S11), in CA
t-statistic: 31.89723504474428
p-value: 6.902165384609942e-54
There is a significant difference (reject the null hypothesis).


In [5]:
print('TrAB/DFT (Table S15) & XGB/DFT trained on 750 data points (Table S11), in CA')

t_stat, p_value = ttest_rel(TrAB_DFT_All, XGB_DFT_750)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S15) & XGB/DFT trained on 750 data points (Table S11), in CA
t-statistic: 12.70859873565434
p-value: 1.603248922145943e-22
There is a significant difference (reject the null hypothesis).


In [6]:
data = pd.read_csv('data/CA_S1.csv')
print(data.head())
RF_DFT_50 = data['RF_DFT_50']
TrAB_DFT_All = data['TrAB_DFT_All']
TrAB_DFT_S1 = data['TrAB_DFT_S1']
TrAB_DFT_FE_S1 = data['TrAB_DFT_FE_S1']

   Run  RF_DFT_50  TrAB_DFT_All  TrAB_DFT_S1  TrAB_DFT_FE_S1
0    0   0.068619      0.321043     0.563848        0.603082
1    1  -0.188152      0.740186     0.789190        0.783156
2    2   0.459085      0.676513     0.616334        0.599995
3    3   0.340911      0.768791     0.830992        0.876952
4    4  -0.010733      0.452751     0.437378        0.518926


In [7]:
print('TrAB/DFT (Source: All; Table S15) & TrAB/DFT (Source: S1;Table S17), in CA')

t_stat, p_value = ttest_rel(TrAB_DFT_All, TrAB_DFT_S1)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Source: All; Table S15) & TrAB/DFT (Source: S1;Table S17), in CA
t-statistic: -4.379950156315912
p-value: 2.9525851178622976e-05
There is a significant difference (reject the null hypothesis).


In [8]:
print('TrAB/DFT (Source: S1; Table S17) & TrAB/DFT_FE (Source: S1;Table S7), in CA')

t_stat, p_value = ttest_rel(TrAB_DFT_S1, TrAB_DFT_FE_S1)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Source: S1; Table S17) & TrAB/DFT_FE (Source: S1;Table S7), in CA
t-statistic: -11.742751908178393
p-value: 1.8246241856250946e-20
There is a significant difference (reject the null hypothesis).


In [9]:
print('TrAB/DFT_FE (Source: S1;Table S17) & RF/DFT (Table S9), in CA')

t_stat, p_value = ttest_rel(RF_DFT_50, TrAB_DFT_FE_S1)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT_FE (Source: S1;Table S17) & RF/DFT (Table S9), in CA
t-statistic: -29.75207234539871
p-value: 3.5660977583907795e-51
There is a significant difference (reject the null hypothesis).


In [10]:
data = pd.read_csv('data/CO_a.csv')
print(data.head())
RF_50 = data['RF_50']
XGB_700 = data['XGB_700']
XGB_750 = data['XGB_750']
TrAB = data['TrAB']

   Run     RF_50   XGB_700   XGB_750      TrAB
0    0  0.533950  0.678687  0.743479  0.804822
1    1  0.016345  0.344799  0.547325  0.657055
2    2  0.502137  0.670859  0.813493  0.823318
3    3  0.327812  0.626690  0.849799  0.782982
4    4  0.304776  0.696276  0.770047  0.815074


In [11]:
print('TrAB/DFT (Table S24) & RF/DFT (Table S24), in CO_a')

t_stat, p_value = ttest_rel(TrAB, RF_50)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S24) & RF/DFT (Table S24), in CO_a
t-statistic: 23.201181364658115
p-value: 8.0908016059637e-42
There is a significant difference (reject the null hypothesis).


In [12]:
print('TrAB/DFT (Table S24) & XGB/DFT trained on 700 data points (Table S24), in CO_a')

t_stat, p_value = ttest_rel(TrAB, XGB_700)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S24) & XGB/DFT trained on 700 data points (Table S24), in CO_a
t-statistic: 21.022187700756078
p-value: 2.749958307527428e-38
There is a significant difference (reject the null hypothesis).


In [13]:
print('TrAB/DFT (Table S24) & XGB/DFT trained on 750 data points (Table S24), in CO_a')

t_stat, p_value = ttest_rel(TrAB, XGB_750)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S24) & XGB/DFT trained on 750 data points (Table S24), in CO_a
t-statistic: 6.14300924498314
p-value: 1.6966727724075942e-08
There is a significant difference (reject the null hypothesis).


In [14]:
data = pd.read_csv('data/CO_b.csv')
print(data.head())
RF_50 = data['RF_50']
XGB_700 = data['XGB_700']
XGB_750 = data['XGB_750']
TrAB = data['TrAB']

   Run     RF_50   XGB_700   XGB_750      TrAB
0    0  0.576839  0.249742  0.655421  0.734779
1    1  0.088171  0.327235  0.679869  0.680665
2    2  0.626713  0.436153  0.805571  0.732674
3    3  0.707545  0.299933  0.701446  0.778006
4    4  0.649332  0.352372  0.715339  0.785882


In [15]:
print('TrAB/DFT (Table S24) & RF/DFT (Table S24), in CO_b')

t_stat, p_value = ttest_rel(TrAB, RF_50)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S24) & RF/DFT (Table S24), in CO_b
t-statistic: 21.024573228666718
p-value: 2.724779955623454e-38
There is a significant difference (reject the null hypothesis).


In [16]:
print('TrAB/DFT (Table S24) & XGB/DFT trained on 700 data points (Table S24), in CO_b')

t_stat, p_value = ttest_rel(TrAB, XGB_700)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S24) & XGB/DFT trained on 700 data points (Table S24), in CO_b
t-statistic: 45.5190705994218
p-value: 3.4070402728396378e-68
There is a significant difference (reject the null hypothesis).


In [17]:
print('TrAB/DFT (Table S24) & XGB/DFT trained on 750 data points (Table S24), in CO_b')

t_stat, p_value = ttest_rel(TrAB, XGB_750)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S24) & XGB/DFT trained on 750 data points (Table S24), in CO_b
t-statistic: 7.402092928104163
p-value: 4.47244835576941e-11
There is a significant difference (reject the null hypothesis).


In [18]:
data = pd.read_csv('data/CO_c.csv')
print(data.head())
RF_50 = data['RF_50']
XGB_700 = data['XGB_700']
XGB_750 = data['XGB_750']
TrAB = data['TrAB']

   Run     RF_50   XGB_700   XGB_750      TrAB
0    0  0.504917  0.687809  0.868085  0.827294
1    1  0.401991  0.591923  0.694071  0.824475
2    2  0.527857  0.721575  0.830696  0.877277
3    3  0.515143  0.651817  0.763414  0.834685
4    4  0.575620  0.678203  0.792759  0.841163


In [19]:
print('TrAB/DFT (Table S24) & RF/DFT (Table S24), in CO_c')

t_stat, p_value = ttest_rel(TrAB, RF_50)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S24) & RF/DFT (Table S24), in CO_c
t-statistic: 30.30922949070567
p-value: 6.801741509547473e-52
There is a significant difference (reject the null hypothesis).


In [20]:
print('TrAB/DFT (Table S24) & XGB/DFT trained on 700 data points (Table S24), in CO_c')

t_stat, p_value = ttest_rel(TrAB, XGB_700)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S24) & XGB/DFT trained on 700 data points (Table S24), in CO_c
t-statistic: 34.7776313003039
p-value: 2.6525898769856815e-57
There is a significant difference (reject the null hypothesis).


In [21]:
print('TrAB/DFT (Table S24) & XGB/DFT trained on 750 data points (Table S24), in CO_c')

t_stat, p_value = ttest_rel(TrAB, XGB_750)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S24) & XGB/DFT trained on 750 data points (Table S24), in CO_c
t-statistic: 12.36154845615955
p-value: 8.704724881683929e-22
There is a significant difference (reject the null hypothesis).


In [22]:
data = pd.read_csv('data/CO_d.csv')
print(data.head())
RF_50 = data['RF_50']
XGB_700 = data['XGB_700']
XGB_750 = data['XGB_750']
TrAB = data['TrAB']

   Run     RF_50   XGB_700   XGB_750      TrAB
0    0  0.440022  0.320816  0.579851  0.852969
1    1  0.215179 -0.053118  0.292704  0.724592
2    2  0.423395 -0.068780  0.256490  0.606272
3    3  0.426849  0.107521  0.420789  0.706969
4    4  0.586205 -0.012034  0.352323  0.713905


In [23]:
print('TrAB/DFT (Table S24) & RF/DFT (Table S24), in CO_d')

t_stat, p_value = ttest_rel(TrAB, RF_50)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S24) & RF/DFT (Table S24), in CO_d
t-statistic: 24.88145394234157
p-value: 2.1763985098353838e-44
There is a significant difference (reject the null hypothesis).


In [24]:
print('TrAB/DFT (Table S24) & XGB/DFT trained on 700 data points (Table S24), in CO_d')

t_stat, p_value = ttest_rel(TrAB, XGB_700)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S24) & XGB/DFT trained on 700 data points (Table S24), in CO_d
t-statistic: 33.875424312574935
p-value: 2.931120012606908e-56
There is a significant difference (reject the null hypothesis).


In [25]:
print('TrAB/DFT (Table S24) & XGB/DFT trained on 750 data points (Table S24), in CO_d')

t_stat, p_value = ttest_rel(TrAB, XGB_750)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S24) & XGB/DFT trained on 750 data points (Table S24), in CO_d
t-statistic: 25.903111001131617
p-value: 6.8507060126866355e-46
There is a significant difference (reject the null hypothesis).


In [26]:
data = pd.read_csv('data/CO_e.csv')
print(data.head())
RF_50 = data['RF_50']
XGB_700 = data['XGB_700']
XGB_750 = data['XGB_750']
TrAB = data['TrAB']

   Run     RF_50   XGB_700   XGB_750      TrAB
0    0  0.437617  0.656323  0.746912  0.816320
1    1 -0.525392  0.650988  0.399642  0.835107
2    2  0.488364  0.626138  0.756298  0.861815
3    3  0.394589  0.729171  0.693801  0.812223
4    4  0.424264  0.597386  0.718320  0.768265


In [27]:
print('TrAB/DFT (Table S24) & RF/DFT (Table S24), in CO_e')

t_stat, p_value = ttest_rel(TrAB, RF_50)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S24) & RF/DFT (Table S24), in CO_e
t-statistic: 26.26678959638245
p-value: 2.0496371362430134e-46
There is a significant difference (reject the null hypothesis).


In [28]:
print('TrAB/DFT (Table S24) & XGB/DFT trained on 700 data points (Table S24), in CO_e')

t_stat, p_value = ttest_rel(TrAB, XGB_700)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S24) & XGB/DFT trained on 700 data points (Table S24), in CO_e
t-statistic: 27.234191045479836
p-value: 8.786999368776139e-48
There is a significant difference (reject the null hypothesis).


In [29]:
print('TrAB/DFT (Table S24) & XGB/DFT trained on 750 data points (Table S24), in CO_e')

t_stat, p_value = ttest_rel(TrAB, XGB_750)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S24) & XGB/DFT trained on 750 data points (Table S24), in CO_e
t-statistic: 15.726836062911191
p-value: 1.1308343917588555e-28
There is a significant difference (reject the null hypothesis).


In [30]:
data = pd.read_csv('data/CS.csv')
print(data.head())
RF_50 = data['RF_50']
XGB_700 = data['XGB_700']
XGB_750 = data['XGB_750']
TrAB = data['TrAB']

   Run     RF_50   XGB_700   XGB_750      TrAB
0    0  0.256797  0.378712  0.341837  0.463755
1    1 -0.501520  0.404286  0.344600  0.448208
2    2  0.227116  0.405029  0.252145  0.511192
3    3  0.137238  0.519772  0.451620  0.544872
4    4  0.240468  0.380697  0.493451  0.518015


In [31]:
print('TrAB/DFT (Table S24) & RF/DFT (Table S24), in CS')

t_stat, p_value = ttest_rel(TrAB, RF_50)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S24) & RF/DFT (Table S24), in CS
t-statistic: 20.55633987149075
p-value: 1.678741985491224e-37
There is a significant difference (reject the null hypothesis).


In [32]:
print('TrAB/DFT (Table S24) & XGB/DFT trained on 700 data points (Table S24), in CS')

t_stat, p_value = ttest_rel(TrAB, XGB_700)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S24) & XGB/DFT trained on 700 data points (Table S24), in CS
t-statistic: 2.4555975999459663
p-value: 0.01580795700067162
There is a significant difference (reject the null hypothesis).


In [33]:
print('TrAB/DFT (Table S24) & XGB/DFT trained on 750 data points (Table S24), in CS')

t_stat, p_value = ttest_rel(TrAB, XGB_750)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S24) & XGB/DFT trained on 750 data points (Table S24), in CS
t-statistic: 9.630029762339744
p-value: 7.018105792054031e-16
There is a significant difference (reject the null hypothesis).


In [34]:
data = pd.read_csv('data/CN.csv')
print(data.head())
RF_50 = data['RF_50']
XGB_700 = data['XGB_700']
XGB_750 = data['XGB_750']
TrAB = data['TrAB']

   Run     RF_50   XGB_700   XGB_750      TrAB
0    0  0.183984  0.430460  0.502115  0.513170
1    1  0.166871  0.344189  0.433256  0.634300
2    2  0.413852  0.441665  0.440000  0.627432
3    3  0.274727  0.412746  0.469931  0.671360
4    4 -0.090785  0.565013  0.499040  0.721493


In [35]:
print('TrAB/DFT (Table S24) & RF/DFT (Table S24), in CN')

t_stat, p_value = ttest_rel(TrAB, RF_50)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S24) & RF/DFT (Table S24), in CN
t-statistic: 17.916810570029753
p-value: 7.808408140934299e-33
There is a significant difference (reject the null hypothesis).


In [36]:
print('TrAB/DFT (Table S24) & XGB/DFT trained on 700 data points (Table S24), in CN')

t_stat, p_value = ttest_rel(TrAB, XGB_700)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S24) & XGB/DFT trained on 700 data points (Table S24), in CN
t-statistic: 23.51095726133518
p-value: 2.6574581774812646e-42
There is a significant difference (reject the null hypothesis).


In [37]:
print('TrAB/DFT (Table S24) & XGB/DFT trained on 750 data points (Table S24), in CN')

t_stat, p_value = ttest_rel(TrAB, XGB_750)
print(f"t-statistic: {t_stat}")
print(f"p-value: {p_value}")

if p_value < 0.05:
    print("There is a significant difference (reject the null hypothesis).")
else:
    print("There is no significant difference (fail to reject the null hypothesis).")

TrAB/DFT (Table S24) & XGB/DFT trained on 750 data points (Table S24), in CN
t-statistic: 18.067163051105403
p-value: 4.136060736025307e-33
There is a significant difference (reject the null hypothesis).
