In [1]:
import numpy as np
import tensorflow as tf

from tensorflow import keras
from scipy.io import loadmat, savemat

from tensorflow.keras import layers

from sklearn.model_selection import KFold
from sklearn.metrics import label_ranking_average_precision_score as avgprec, coverage_error, label_ranking_loss

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
#     logical_gpus = tf.config.experimental.list_logical_devices('GPU')
#     print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

In [48]:
fname = ['4802_result/result_4802_', '4802_result/result_large-reg-4802_', 
         '4802_result/result_pssm-conv-4802_', 'encoding_pssm_4802/new_encoding_pssm_4802_']
modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
res = []
for m in range(2):
    for n in range(2):
        print(modelname[m*2 + n])
        ap_list = []
        rl_list = []
        ce_list = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']
            ap_list.append(avgprec(test_y, pred_y))
            rl_list.append(label_ranking_loss(test_y, pred_y))
            ce_list.append(coverage_error(test_y, pred_y) - 1)
        print('ap_list: ' + ' '.join(['{:.4f}'.format(x) for x in ap_list]))
        print('rl_list: ' + ' '.join(['{:.4f}'.format(x) for x in rl_list]))
        print('ce_list: ' + ' '.join(['{:.4f}'.format(x) for x in ce_list]))

BLSTM
ap_list: 0.7013 0.6977 0.6847 0.6846 0.6797
rl_list: 0.0753 0.0774 0.0781 0.0820 0.0902
ce_list: 3.6082 3.7179 3.7107 3.8931 4.2766
BLSTM ConvNet1
ap_list: 0.7550 0.7475 0.7499 0.7559 0.7463
rl_list: 0.0555 0.0594 0.0627 0.0593 0.0607
ce_list: 2.7276 2.8560 3.0325 2.8659 2.9465
ConvNet2
ap_list: 0.6181 0.6192 0.6196 0.6161 0.6228
rl_list: 0.0694 0.0678 0.0691 0.0673 0.0678
ce_list: 3.3299 3.3123 3.3281 3.2559 3.3095
BLSTM ConvNet1 ConvNet2
ap_list: 0.7377 0.7406 0.7416 0.7428 0.7445
rl_list: 0.0641 0.0625 0.0649 0.0609 0.0662
ce_list: 3.0970 2.9896 3.1361 2.9197 3.1217


In [49]:
fname = ['encoding_3106/new_encoding_3106_res_', 'encoding_3106_lrg/new_encoding_3106_lrg_', 
         'pssm_conv_3106/new_pssm_3106_', 'encoding_pssm_3106/new_encoding_pssm_3106_']
modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
res = []
for m in range(2):
    for n in range(2):
        print(modelname[m*2 + n])
        ap_list = []
        rl_list = []
        ce_list = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']
            ap_list.append(avgprec(test_y, pred_y))
            rl_list.append(label_ranking_loss(test_y, pred_y))
            ce_list.append(coverage_error(test_y, pred_y) - 1)
        print('ap_list: ' + ' '.join(['{:.4f}'.format(x) for x in ap_list]))
        print('rl_list: ' + ' '.join(['{:.4f}'.format(x) for x in rl_list]))
        print('ce_list: ' + ' '.join(['{:.4f}'.format(x) for x in ce_list]))

BLSTM
ap_list: 0.7476 0.7640 0.7504 0.7452 0.7546
rl_list: 0.0965 0.0916 0.0973 0.1053 0.0929
ce_list: 1.5741 1.5098 1.6049 1.7230 1.5359
BLSTM ConvNet1
ap_list: 0.7910 0.7844 0.7853 0.7867 0.7906
rl_list: 0.0780 0.0804 0.0769 0.0793 0.0747
ce_list: 1.3342 1.3532 1.2970 1.3088 1.2636
ConvNet2
ap_list: 0.6324 0.6536 0.6452 0.6449 0.6377
rl_list: 0.1351 0.1254 0.1292 0.1302 0.1328
ce_list: 2.0873 1.9539 2.0140 2.0139 2.0621
BLSTM ConvNet1 ConvNet2
ap_list: 0.7903 0.7932 0.7967 0.7853 0.7849
rl_list: 0.0750 0.0751 0.0707 0.0780 0.0802
ce_list: 1.2681 1.2824 1.2115 1.3124 1.3496


In [55]:
from sklearn.metrics import roc_curve, auc
modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
fname = ['encoding_3106/new_encoding_3106_res_', 'encoding_3106_lrg/new_encoding_3106_lrg_', 
         'pssm_conv_3106/new_pssm_3106_', 'encoding_pssm_3106/new_encoding_pssm_3106_']
res = []
for m in range(2):
    for n in range(2):
        tres = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']

            resauc = []
            for i in range(14):
#                 if i in [18, 23, 28, 31]:
#                     continue
                fpr, tpr, _ = roc_curve(test_y[:, i], pred_y[:, i])
                roc_auc = auc(fpr, tpr)
                resauc.append(roc_auc)
                
            
            tres.append(resauc)
            # res.append(resauc)
        print(modelname[m*2+n])
        for t in tres:
            print(' '.join(['{:.4f}'.format(x) for x in t]))

BLSTM
0.8856 0.8284 0.8743 0.8085 0.8332 0.8992 0.8522 0.8955 0.8583 0.8628 0.8678 0.8203 0.8865 0.8358
0.9135 0.8501 0.9078 0.9018 0.8630 0.8977 0.8706 0.9242 0.8609 0.8882 0.8760 0.8315 0.8927 0.8997
0.9048 0.8426 0.8979 0.8301 0.8576 0.8869 0.8562 0.9013 0.8871 0.8894 0.8769 0.8480 0.8907 0.8614
0.8948 0.8411 0.8713 0.9008 0.8889 0.9057 0.8563 0.9035 0.8940 0.8860 0.8618 0.8383 0.8599 0.8339
0.8973 0.8448 0.8723 0.8557 0.8791 0.8977 0.8523 0.9052 0.9081 0.8763 0.8724 0.8428 0.8874 0.8842
BLSTM ConvNet1
0.9255 0.8794 0.9121 0.8854 0.8930 0.9081 0.8952 0.9203 0.9195 0.9228 0.8965 0.9134 0.9142 0.9039
0.9352 0.8834 0.9063 0.8708 0.8972 0.9047 0.8893 0.9381 0.8558 0.8931 0.8969 0.8647 0.9070 0.9379
0.9368 0.8828 0.9179 0.8630 0.8903 0.9225 0.8719 0.8992 0.9257 0.9196 0.8968 0.8438 0.9141 0.9327
0.9235 0.8779 0.9302 0.9230 0.8836 0.9127 0.9018 0.9192 0.9117 0.8917 0.8932 0.8864 0.9067 0.8998
0.9302 0.8802 0.9146 0.8582 0.8962 0.9140 0.9060 0.8877 0.9072 0.9000 0.8973 0.8560 0.9066 0.8696

In [57]:
fname = ['4802_result/result_4802_', '4802_result/result_large-reg-4802_', 
         '4802_result/result_pssm-conv-4802_', 'encoding_pssm_4802/new_encoding_pssm_4802_']
modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
res = []
for m in range(2):
    for n in range(2):
        tres = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']

            resauc = []
            for i in range(37):
                if i in [18, 23, 28, 31]:
                    continue
                fpr, tpr, _ = roc_curve(test_y[:, i], pred_y[:, i])
                roc_auc = auc(fpr, tpr)
                resauc.append(roc_auc)
                
            tres.append(resauc)
        print(modelname[m*2+n])
        for t in tres:
            print(' '.join(['{:.4f}'.format(x) for x in t]))

BLSTM
0.8351 0.8524 0.6345 0.8781 0.8327 0.8001 0.8580 0.7618 0.8580 0.7423 0.8193 0.8873 0.8454 0.8225 0.8187 0.8850 0.8861 0.8528 0.8408 0.9279 0.6156 0.7901 0.8704 0.8026 0.8643 0.8755 0.8183 0.8804 0.7682 0.6300 0.6297 0.7253 0.6589
0.7831 0.8238 0.7148 0.8746 0.8124 0.7604 0.8209 0.6527 0.7944 0.7767 0.7855 0.8997 0.8262 0.8602 0.8606 0.8423 0.6561 0.8686 0.8896 0.7812 0.7934 0.7769 0.8658 0.7846 0.8454 0.8809 0.8455 0.8684 0.5384 0.3808 0.7072 0.7306 0.8589
0.8368 0.8714 0.5040 0.8760 0.8126 0.7665 0.8285 0.6578 0.7615 0.7395 0.7582 0.8957 0.8175 0.7689 0.8170 0.8859 0.8241 0.7511 0.7856 0.8377 0.6978 0.6791 0.8485 0.8161 0.8463 0.8760 0.7269 0.8762 0.7336 0.6274 0.6672 0.7770 0.8219
0.7311 0.8581 0.8915 0.8838 0.8266 0.7646 0.8176 0.8830 0.7884 0.7670 0.7976 0.8807 0.8205 0.8363 0.6439 0.8415 0.8343 0.8299 0.8131 0.7683 0.6775 0.7447 0.8572 0.7338 0.8512 0.8703 0.7826 0.8618 0.8470 0.8290 0.6571 0.9092 0.8299
0.7971 0.8168 0.8668 0.8815 0.8214 0.7742 0.7998 0.7592 0.8018 0.7265 

In [46]:
ap_list

[0.701288576616281,
 0.6976541451223579,
 0.6846751393451417,
 0.6846242416996341,
 0.6797455851194014]

In [None]:
ap_list.append(avgprec(test_y, pred_y))
rl_list.append(label_ranking_loss(test_y, pred_y))
ce_list.append(coverage_error(test_y, pred_y) - 1)

In [5]:
tmp = loadmat('4802_result/result_4802_0_9.mat')

In [6]:
test_y = tmp['test_y']
pred_y = tmp['pred_y']

In [37]:
pred_y.shape

(11190, 37)

In [10]:
avgprec(test_y, pred_y)

0.701288576616281

In [58]:
for _ in range(10):
    t = []
    for i in range(20):
        r = np.random.choice(11190, 1000, replace=False)
        t.append(avgprec(test_y[r, :], pred_y[r, :]))
    print(' '.join('{:.4f}'.format(x) for x in t))

0.7228 0.7448 0.7462 0.7458 0.7424 0.7489 0.7394 0.7508 0.7412 0.7327 0.7453 0.7346 0.7413 0.7286 0.7536 0.7493 0.7466 0.7565 0.7492 0.7424
0.7552 0.7531 0.7505 0.7341 0.7487 0.7324 0.7219 0.7430 0.7486 0.7414 0.7313 0.7478 0.7514 0.7571 0.7390 0.7358 0.7448 0.7281 0.7459 0.7481
0.7400 0.7311 0.7405 0.7411 0.7272 0.7534 0.7521 0.7659 0.7541 0.7460 0.7590 0.7189 0.7398 0.7470 0.7533 0.7458 0.7413 0.7343 0.7390 0.7289
0.7441 0.7370 0.7563 0.7417 0.7602 0.7364 0.7584 0.7395 0.7369 0.7431 0.7380 0.7253 0.7509 0.7368 0.7412 0.7496 0.7449 0.7474 0.7510 0.7610
0.7436 0.7508 0.7486 0.7568 0.7383 0.7348 0.7527 0.7571 0.7344 0.7714 0.7405 0.7393 0.7454 0.7126 0.7370 0.7307 0.7417 0.7540 0.7264 0.7268
0.7508 0.7665 0.7417 0.7565 0.7398 0.7453 0.7483 0.7296 0.7431 0.7230 0.7378 0.7268 0.7484 0.7477 0.7268 0.7356 0.7478 0.7557 0.7332 0.7645
0.7182 0.7399 0.7370 0.7549 0.7551 0.7432 0.7439 0.7350 0.7518 0.7237 0.7469 0.7666 0.7398 0.7547 0.7558 0.7650 0.7465 0.7445 0.7426 0.7426
0.7529 0.7512 0.7435

In [62]:
modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
fname = ['encoding_3106/new_encoding_3106_res_', 'encoding_3106_lrg/new_encoding_3106_lrg_', 
         'pssm_conv_3106/new_pssm_3106_', 'encoding_pssm_3106/new_encoding_pssm_3106_']
res = []
for m in range(2):
    for n in range(2):
        tres = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']
            t = []
            for i in range(20):
                r = np.random.choice(test_y.shape[0], 1000, replace=False)
                t.append(avgprec(test_y[r, :], pred_y[r, :]))
            tres.append(t)
        print(modelname[m*2+n])
        for t in tres:
            print(' '.join(['{:.4f}'.format(x) for x in t]))

BLSTM
0.7675 0.7363 0.7269 0.7621 0.7420 0.7303 0.7447 0.7450 0.7346 0.7469 0.7533 0.7197 0.7552 0.7522 0.7491 0.7333 0.7531 0.7354 0.7467 0.7511
0.7713 0.7728 0.7689 0.7591 0.7489 0.7722 0.7666 0.7716 0.7482 0.7608 0.7461 0.7544 0.7628 0.7517 0.7577 0.7659 0.7738 0.7708 0.7659 0.7612
0.7560 0.7435 0.7454 0.7557 0.7544 0.7714 0.7486 0.7527 0.7492 0.7335 0.7561 0.7338 0.7552 0.7618 0.7501 0.7542 0.7397 0.7402 0.7489 0.7514
0.7599 0.7309 0.7461 0.7449 0.7296 0.7391 0.7529 0.7452 0.7471 0.7437 0.7446 0.7365 0.7464 0.7325 0.7459 0.7307 0.7534 0.7336 0.7291 0.7237
0.7592 0.7494 0.7451 0.7565 0.7510 0.7432 0.7473 0.7668 0.7388 0.7334 0.7628 0.7576 0.7561 0.7545 0.7598 0.7617 0.7462 0.7465 0.7609 0.7527
BLSTM ConvNet1
0.7906 0.8035 0.7988 0.8149 0.7839 0.7944 0.8068 0.7955 0.7991 0.7819 0.7910 0.8051 0.7972 0.7916 0.7852 0.8047 0.8009 0.7872 0.7848 0.7928
0.7721 0.7824 0.7814 0.7724 0.7758 0.7915 0.7756 0.7871 0.7946 0.7802 0.7718 0.7772 0.7904 0.7866 0.7833 0.7846 0.7822 0.7715 0.8003 0.7713

In [64]:
modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
fname = ['4802_result/result_4802_', '4802_result/result_large-reg-4802_', 
         '4802_result/result_pssm-conv-4802_', 'encoding_pssm_4802/new_encoding_pssm_4802_']
res = []
for m in range(2):
    for n in range(2):
        tres = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']
            t = []
            for i in range(20):
                r = np.random.choice(test_y.shape[0], 1000, replace=False)
                t.append(avgprec(test_y[r, :], pred_y[r, :]))
            tres.append(t)
        print(modelname[m*2+n])
        for t in tres:
            print(' '.join(['{:.4f}'.format(x) for x in t]))

BLSTM
0.7128 0.7125 0.6996 0.6951 0.7025 0.6897 0.7005 0.6928 0.6891 0.6877 0.7036 0.6904 0.6879 0.6850 0.6956 0.7090 0.7204 0.6953 0.6896 0.7061
0.7005 0.6971 0.6995 0.6848 0.7130 0.7152 0.6821 0.7185 0.7160 0.6983 0.6890 0.6942 0.6881 0.7111 0.7050 0.7000 0.6943 0.7073 0.7030 0.6858
0.6724 0.6796 0.6641 0.6925 0.6739 0.6817 0.6797 0.6792 0.6962 0.6909 0.6832 0.6910 0.6851 0.6867 0.6849 0.6846 0.6745 0.6974 0.6788 0.6888
0.6867 0.6673 0.6786 0.6986 0.6925 0.6967 0.6829 0.6956 0.6941 0.6770 0.6875 0.6841 0.6930 0.6567 0.6819 0.6738 0.6734 0.6931 0.6767 0.6735
0.6811 0.6770 0.6982 0.6826 0.6754 0.6802 0.6737 0.6722 0.6797 0.6532 0.6839 0.6765 0.6756 0.6798 0.6816 0.6894 0.6874 0.6824 0.6748 0.6776
BLSTM ConvNet1
0.7451 0.7408 0.7578 0.7650 0.7476 0.7435 0.7491 0.7531 0.7463 0.7587 0.7636 0.7697 0.7722 0.7670 0.7634 0.7428 0.7499 0.7532 0.7713 0.7642
0.7510 0.7403 0.7473 0.7293 0.7429 0.7665 0.7482 0.7448 0.7536 0.7442 0.7602 0.7486 0.7463 0.7394 0.7502 0.7492 0.7490 0.7670 0.7523 0.7532

In [60]:
test_y.shape

(7220, 14)

In [42]:
np.amin(t)

0.6745576023041384

In [65]:
## Import the packages
import numpy as np
from scipy import stats


## Define 2 random distributions
#Sample Size
N = 10
#Gaussian distributed data with mean = 2 and var = 1
a = np.random.randn(N) + 2
#Gaussian distributed data with with mean = 0 and var = 1
b = np.random.randn(N)


## Calculate the Standard Deviation
#Calculate the variance to get the standard deviation

#For unbiased max likelihood estimate we have to divide the var by N-1, and therefore the parameter ddof = 1
var_a = a.var(ddof=1)
var_b = b.var(ddof=1)

#std deviation
s = np.sqrt((var_a + var_b)/2)
s



## Calculate the t-statistics
t = (a.mean() - b.mean())/(s*np.sqrt(2/N))



## Compare with the critical t-value
#Degrees of freedom
df = 2*N - 2

#p-value after comparison with the t 
p = 1 - stats.t.cdf(t,df=df)


print("t = " + str(t))
print("p = " + str(2*p))
### You can see that after comparing the t statistic with the critical t value (computed internally) we get a good p value of 0.0005 and thus we reject the null hypothesis and thus it proves that the mean of the two distributions are different and statistically significant.


## Cross Checking with the internal scipy function
t2, p2 = stats.ttest_ind(a,b)
print("t = " + str(t2))
print("p = " + str(p2))

t = 5.269665015565286
p = 5.1985831387657555e-05
t = 5.269665015565285
p = 5.198583138764879e-05


In [68]:
a= [1,2,3,4,5]
b= [2,2,2,2,2]

t,p = stats.ttest_ind(a,b)

In [69]:
t

1.414213562373095

In [70]:
p

0.19501552810007572

In [74]:
stats.ttest_1samp(a,2)

Ttest_1sampResult(statistic=1.414213562373095, pvalue=0.23019964108049873)

In [72]:
t

0.23019964108049873

In [73]:
p

1.414213562373095

In [95]:
t, p = stats.ttest_1samp(a,1)

In [96]:
p

0.04742065558431962

In [97]:
t

2.82842712474619

In [100]:
for t in tres:
    print(np.average(t))
    print(stats.ttest_1samp(t,0.6916))

0.7392558215382585
Ttest_1sampResult(statistic=16.425577087730836, pvalue=1.1032537454556298e-12)
0.7401285413183635
Ttest_1sampResult(statistic=19.13977386991606, pvalue=7.071347429729854e-14)
0.7419374437613769
Ttest_1sampResult(statistic=23.987157121407098, pvalue=1.142218887410092e-15)
0.740900607128036
Ttest_1sampResult(statistic=20.970421738305454, pvalue=1.3435574169367129e-14)
0.7482548216821112
Ttest_1sampResult(statistic=25.04507846932193, pvalue=5.152557611326228e-16)


In [101]:
np.average(tres)

0.7420954470856294

In [116]:
stats.ttest_1samp([0.7377,0.7406,0.7416,0.7428,0.7445],0.7382)

Ttest_1sampResult(statistic=2.8453913149701906, pvalue=0.04661256496853713)

In [117]:
stats.ttest_ind([0.7377,0.7406,0.7416,0.7428,0.7445], [0.7395,0.7372,0.7514,0.7499,0.7484])

Ttest_indResult(statistic=-1.2356276663966228, pvalue=0.2516485029056008)

In [122]:
modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
fname = ['encoding_3106/new_encoding_3106_res_', 'encoding_3106_lrg/new_encoding_3106_lrg_', 
         'pssm_conv_3106/new_pssm_3106_', 'encoding_pssm_3106/new_encoding_pssm_3106_']
res3106 = []
for m in range(2):
    for n in range(2):
        tres3106 = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']

            resauc = []
            for i in range(14):
#                 if i in [18, 23, 28, 31]:
#                     continue
                fpr, tpr, _ = roc_curve(test_y[:, i], pred_y[:, i])
                roc_auc = auc(fpr, tpr)
                resauc.append(roc_auc)
                
            
            tres3106.append(resauc)
        res3106.append(tres3106)

In [123]:
np.array(res3106).shape

(4, 5, 14)

In [124]:
np.array(res3106)

array([[[0.88558773, 0.82837235, 0.87432089, 0.80845188, 0.83322594,
         0.89923436, 0.85224886, 0.89549255, 0.85834918, 0.86279965,
         0.86778552, 0.82030095, 0.88647382, 0.83581046],
        [0.91350214, 0.85007062, 0.90776665, 0.90176712, 0.86299885,
         0.89766938, 0.87061877, 0.92415097, 0.86089824, 0.88816526,
         0.8760375 , 0.83149298, 0.89270727, 0.89969798],
        [0.90476728, 0.84260502, 0.89794745, 0.83011047, 0.85756158,
         0.886856  , 0.85617672, 0.90132237, 0.88709378, 0.88939266,
         0.87691736, 0.84802986, 0.8906857 , 0.8613677 ],
        [0.89483139, 0.84112812, 0.87130936, 0.90084514, 0.88894366,
         0.90568126, 0.85627738, 0.90353636, 0.89401735, 0.88600057,
         0.86175284, 0.83834565, 0.85985293, 0.83388115],
        [0.89728526, 0.84480593, 0.87232599, 0.85567348, 0.8790821 ,
         0.89772155, 0.85225944, 0.90515152, 0.90809627, 0.87628303,
         0.87240895, 0.84275245, 0.88743305, 0.88419656]],

       [[0.9255170

In [131]:
for i in range(4):
    for j in range(4):
        t, p = stats.ttest_ind(res3106[i],res3106[j], axis = None)
        print(i, j, t, p)

0 1 -6.834092542627083 2.430516210458465e-10
0 2 13.682742034418075 1.821938319497012e-27
0 3 -6.925773491212086 1.505348664605861e-10
1 2 17.585626541071637 4.713645927400066e-37
1 3 -0.0027180149633849173 0.9978352656543824
2 3 -17.6485417629732 3.3475837413283366e-37


In [132]:
fname = ['4802_result/result_4802_', '4802_result/result_large-reg-4802_', 
         '4802_result/result_pssm-conv-4802_', 'encoding_pssm_4802/new_encoding_pssm_4802_']
modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
res4802 = []
for m in range(2):
    for n in range(2):
        tres4802 = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']

            resauc = []
            for i in range(37):
                if i in [18, 23, 28, 31]:
                    continue
                fpr, tpr, _ = roc_curve(test_y[:, i], pred_y[:, i])
                roc_auc = auc(fpr, tpr)
                resauc.append(roc_auc)
                
            tres4802.append(resauc)
        res4802.append(tres4802)

In [134]:
for i in range(4):
    for j in range(4):
        t, p = stats.ttest_ind(res4802[i],res4802[j], axis = None)
        print(i, j, t, p)

0 0 0.0 1.0
0 1 -9.917222979625578 1.9024151613786604e-20
0 2 9.151725759488714 6.125901709638712e-18
0 3 -8.196880601326026 5.617768150368543e-15
1 0 9.917222979625578 1.9024151613786604e-20
1 1 0.0 1.0
1 2 19.01194640468356 7.432637645557658e-55
1 3 1.2428163888904986 0.21482321419622596
2 0 -9.151725759488714 6.125901709638712e-18
2 1 -19.01194640468356 7.432637645557658e-55
2 2 0.0 1.0
2 3 -17.077471764449076 3.1498292609453327e-47
3 0 8.196880601326026 5.617768150368543e-15
3 1 -1.2428163888904986 0.21482321419622596
3 2 17.077471764449076 3.1498292609453327e-47
3 3 0.0 1.0


In [None]:
modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
fname = ['encoding_3106/new_encoding_3106_res_', 'encoding_3106_lrg/new_encoding_3106_lrg_', 
         'pssm_conv_3106/new_pssm_3106_', 'encoding_pssm_3106/new_encoding_pssm_3106_']
ap_res3106 = []
rl_res3106 = []
cov_res3106 = []
for m in range(2):
    for n in range(2):
        ap_tres = []
        rl_tres = []
        cov_tres = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']
            ap_t = []
            rl_t = []
            cov_t = []
            for i in range(20):
                r = np.random.choice(test_y.shape[0], 1000, replace=False)
                ap_t.append(avgprec(test_y[r, :], pred_y[r, :]))
                rl_t.append(label_ranking_loss(test_y[r, :], pred_y[r, :]))
                cov_t.append(coverage_error(test_y[r, :], pred_y[r, :]) - 1)
            ap_tres.append(ap_t)
            rl_tres.append(rl_t)
            cov_tres.append(cov_t)
        ap_res3106.append(ap_tres)
        rl_res3106.append(rl_tres)
        cov_res3106.append(cov_tres)

In [141]:
np.array(cov_res).shape

(4, 5, 20)

In [None]:
fname = ['4802_result/result_4802_', '4802_result/result_large-reg-4802_', 
         '4802_result/result_pssm-conv-4802_', 'encoding_pssm_4802/new_encoding_pssm_4802_']
modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
ap_res4802 = []
rl_res4802 = []
cov_res4802 = []
for m in range(2):
    for n in range(2):
        ap_tres = []
        rl_tres = []
        cov_tres = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']
            ap_t = []
            rl_t = []
            cov_t = []
            for i in range(20):
                r = np.random.choice(test_y.shape[0], 1000, replace=False)
                ap_t.append(avgprec(test_y[r, :], pred_y[r, :]))
                rl_t.append(label_ranking_loss(test_y[r, :], pred_y[r, :]))
                cov_t.append(coverage_error(test_y[r, :], pred_y[r, :]) - 1)
            ap_tres.append(ap_t)
            rl_tres.append(rl_t)
            cov_tres.append(cov_t)
        ap_res4802.append(ap_tres)
        rl_res4802.append(rl_tres)
        cov_res4802.append(cov_tres)