In [3]:
import numpy as np
import tensorflow as tf

from tensorflow import keras
from scipy.io import loadmat, savemat

from tensorflow.keras import layers

from sklearn.model_selection import KFold
from sklearn.metrics import label_ranking_average_precision_score as avgprec, coverage_error, label_ranking_loss

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
#     logical_gpus = tf.config.experimental.list_logical_devices('GPU')
#     print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

In [48]:
fname = ['4802_result/result_4802_', '4802_result/result_large-reg-4802_', 
         '4802_result/result_pssm-conv-4802_', 'encoding_pssm_4802/new_encoding_pssm_4802_']
modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
res = []
for m in range(2):
    for n in range(2):
        print(modelname[m*2 + n])
        ap_list = []
        rl_list = []
        ce_list = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']
            ap_list.append(avgprec(test_y, pred_y))
            rl_list.append(label_ranking_loss(test_y, pred_y))
            ce_list.append(coverage_error(test_y, pred_y) - 1)
        print('ap_list: ' + ' '.join(['{:.4f}'.format(x) for x in ap_list]))
        print('rl_list: ' + ' '.join(['{:.4f}'.format(x) for x in rl_list]))
        print('ce_list: ' + ' '.join(['{:.4f}'.format(x) for x in ce_list]))

BLSTM
ap_list: 0.7013 0.6977 0.6847 0.6846 0.6797
rl_list: 0.0753 0.0774 0.0781 0.0820 0.0902
ce_list: 3.6082 3.7179 3.7107 3.8931 4.2766
BLSTM ConvNet1
ap_list: 0.7550 0.7475 0.7499 0.7559 0.7463
rl_list: 0.0555 0.0594 0.0627 0.0593 0.0607
ce_list: 2.7276 2.8560 3.0325 2.8659 2.9465
ConvNet2
ap_list: 0.6181 0.6192 0.6196 0.6161 0.6228
rl_list: 0.0694 0.0678 0.0691 0.0673 0.0678
ce_list: 3.3299 3.3123 3.3281 3.2559 3.3095
BLSTM ConvNet1 ConvNet2
ap_list: 0.7377 0.7406 0.7416 0.7428 0.7445
rl_list: 0.0641 0.0625 0.0649 0.0609 0.0662
ce_list: 3.0970 2.9896 3.1361 2.9197 3.1217


In [49]:
fname = ['encoding_3106/new_encoding_3106_res_', 'encoding_3106_lrg/new_encoding_3106_lrg_', 
         'pssm_conv_3106/new_pssm_3106_', 'encoding_pssm_3106/new_encoding_pssm_3106_']
modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
res = []
for m in range(2):
    for n in range(2):
        print(modelname[m*2 + n])
        ap_list = []
        rl_list = []
        ce_list = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']
            ap_list.append(avgprec(test_y, pred_y))
            rl_list.append(label_ranking_loss(test_y, pred_y))
            ce_list.append(coverage_error(test_y, pred_y) - 1)
        print('ap_list: ' + ' '.join(['{:.4f}'.format(x) for x in ap_list]))
        print('rl_list: ' + ' '.join(['{:.4f}'.format(x) for x in rl_list]))
        print('ce_list: ' + ' '.join(['{:.4f}'.format(x) for x in ce_list]))

BLSTM
ap_list: 0.7476 0.7640 0.7504 0.7452 0.7546
rl_list: 0.0965 0.0916 0.0973 0.1053 0.0929
ce_list: 1.5741 1.5098 1.6049 1.7230 1.5359
BLSTM ConvNet1
ap_list: 0.7910 0.7844 0.7853 0.7867 0.7906
rl_list: 0.0780 0.0804 0.0769 0.0793 0.0747
ce_list: 1.3342 1.3532 1.2970 1.3088 1.2636
ConvNet2
ap_list: 0.6324 0.6536 0.6452 0.6449 0.6377
rl_list: 0.1351 0.1254 0.1292 0.1302 0.1328
ce_list: 2.0873 1.9539 2.0140 2.0139 2.0621
BLSTM ConvNet1 ConvNet2
ap_list: 0.7903 0.7932 0.7967 0.7853 0.7849
rl_list: 0.0750 0.0751 0.0707 0.0780 0.0802
ce_list: 1.2681 1.2824 1.2115 1.3124 1.3496


In [55]:
from sklearn.metrics import roc_curve, auc
modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
fname = ['encoding_3106/new_encoding_3106_res_', 'encoding_3106_lrg/new_encoding_3106_lrg_', 
         'pssm_conv_3106/new_pssm_3106_', 'encoding_pssm_3106/new_encoding_pssm_3106_']
res = []
for m in range(2):
    for n in range(2):
        tres = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']

            resauc = []
            for i in range(14):
#                 if i in [18, 23, 28, 31]:
#                     continue
                fpr, tpr, _ = roc_curve(test_y[:, i], pred_y[:, i])
                roc_auc = auc(fpr, tpr)
                resauc.append(roc_auc)
                
            
            tres.append(resauc)
            # res.append(resauc)
        print(modelname[m*2+n])
        for t in tres:
            print(' '.join(['{:.4f}'.format(x) for x in t]))

BLSTM
0.8856 0.8284 0.8743 0.8085 0.8332 0.8992 0.8522 0.8955 0.8583 0.8628 0.8678 0.8203 0.8865 0.8358
0.9135 0.8501 0.9078 0.9018 0.8630 0.8977 0.8706 0.9242 0.8609 0.8882 0.8760 0.8315 0.8927 0.8997
0.9048 0.8426 0.8979 0.8301 0.8576 0.8869 0.8562 0.9013 0.8871 0.8894 0.8769 0.8480 0.8907 0.8614
0.8948 0.8411 0.8713 0.9008 0.8889 0.9057 0.8563 0.9035 0.8940 0.8860 0.8618 0.8383 0.8599 0.8339
0.8973 0.8448 0.8723 0.8557 0.8791 0.8977 0.8523 0.9052 0.9081 0.8763 0.8724 0.8428 0.8874 0.8842
BLSTM ConvNet1
0.9255 0.8794 0.9121 0.8854 0.8930 0.9081 0.8952 0.9203 0.9195 0.9228 0.8965 0.9134 0.9142 0.9039
0.9352 0.8834 0.9063 0.8708 0.8972 0.9047 0.8893 0.9381 0.8558 0.8931 0.8969 0.8647 0.9070 0.9379
0.9368 0.8828 0.9179 0.8630 0.8903 0.9225 0.8719 0.8992 0.9257 0.9196 0.8968 0.8438 0.9141 0.9327
0.9235 0.8779 0.9302 0.9230 0.8836 0.9127 0.9018 0.9192 0.9117 0.8917 0.8932 0.8864 0.9067 0.8998
0.9302 0.8802 0.9146 0.8582 0.8962 0.9140 0.9060 0.8877 0.9072 0.9000 0.8973 0.8560 0.9066 0.8696

In [57]:
fname = ['4802_result/result_4802_', '4802_result/result_large-reg-4802_', 
         '4802_result/result_pssm-conv-4802_', 'encoding_pssm_4802/new_encoding_pssm_4802_']
modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
res = []
for m in range(2):
    for n in range(2):
        tres = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']

            resauc = []
            for i in range(37):
                if i in [18, 23, 28, 31]:
                    continue
                fpr, tpr, _ = roc_curve(test_y[:, i], pred_y[:, i])
                roc_auc = auc(fpr, tpr)
                resauc.append(roc_auc)
                
            tres.append(resauc)
        print(modelname[m*2+n])
        for t in tres:
            print(' '.join(['{:.4f}'.format(x) for x in t]))

BLSTM
0.8351 0.8524 0.6345 0.8781 0.8327 0.8001 0.8580 0.7618 0.8580 0.7423 0.8193 0.8873 0.8454 0.8225 0.8187 0.8850 0.8861 0.8528 0.8408 0.9279 0.6156 0.7901 0.8704 0.8026 0.8643 0.8755 0.8183 0.8804 0.7682 0.6300 0.6297 0.7253 0.6589
0.7831 0.8238 0.7148 0.8746 0.8124 0.7604 0.8209 0.6527 0.7944 0.7767 0.7855 0.8997 0.8262 0.8602 0.8606 0.8423 0.6561 0.8686 0.8896 0.7812 0.7934 0.7769 0.8658 0.7846 0.8454 0.8809 0.8455 0.8684 0.5384 0.3808 0.7072 0.7306 0.8589
0.8368 0.8714 0.5040 0.8760 0.8126 0.7665 0.8285 0.6578 0.7615 0.7395 0.7582 0.8957 0.8175 0.7689 0.8170 0.8859 0.8241 0.7511 0.7856 0.8377 0.6978 0.6791 0.8485 0.8161 0.8463 0.8760 0.7269 0.8762 0.7336 0.6274 0.6672 0.7770 0.8219
0.7311 0.8581 0.8915 0.8838 0.8266 0.7646 0.8176 0.8830 0.7884 0.7670 0.7976 0.8807 0.8205 0.8363 0.6439 0.8415 0.8343 0.8299 0.8131 0.7683 0.6775 0.7447 0.8572 0.7338 0.8512 0.8703 0.7826 0.8618 0.8470 0.8290 0.6571 0.9092 0.8299
0.7971 0.8168 0.8668 0.8815 0.8214 0.7742 0.7998 0.7592 0.8018 0.7265 

In [46]:
ap_list

[0.701288576616281,
 0.6976541451223579,
 0.6846751393451417,
 0.6846242416996341,
 0.6797455851194014]

In [None]:
ap_list.append(avgprec(test_y, pred_y))
rl_list.append(label_ranking_loss(test_y, pred_y))
ce_list.append(coverage_error(test_y, pred_y) - 1)

In [5]:
tmp = loadmat('4802_result/result_4802_0_9.mat')

In [6]:
test_y = tmp['test_y']
pred_y = tmp['pred_y']

In [37]:
pred_y.shape

(11190, 37)

In [10]:
avgprec(test_y, pred_y)

0.701288576616281

In [58]:
for _ in range(10):
    t = []
    for i in range(20):
        r = np.random.choice(11190, 1000, replace=False)
        t.append(avgprec(test_y[r, :], pred_y[r, :]))
    print(' '.join('{:.4f}'.format(x) for x in t))

0.7228 0.7448 0.7462 0.7458 0.7424 0.7489 0.7394 0.7508 0.7412 0.7327 0.7453 0.7346 0.7413 0.7286 0.7536 0.7493 0.7466 0.7565 0.7492 0.7424
0.7552 0.7531 0.7505 0.7341 0.7487 0.7324 0.7219 0.7430 0.7486 0.7414 0.7313 0.7478 0.7514 0.7571 0.7390 0.7358 0.7448 0.7281 0.7459 0.7481
0.7400 0.7311 0.7405 0.7411 0.7272 0.7534 0.7521 0.7659 0.7541 0.7460 0.7590 0.7189 0.7398 0.7470 0.7533 0.7458 0.7413 0.7343 0.7390 0.7289
0.7441 0.7370 0.7563 0.7417 0.7602 0.7364 0.7584 0.7395 0.7369 0.7431 0.7380 0.7253 0.7509 0.7368 0.7412 0.7496 0.7449 0.7474 0.7510 0.7610
0.7436 0.7508 0.7486 0.7568 0.7383 0.7348 0.7527 0.7571 0.7344 0.7714 0.7405 0.7393 0.7454 0.7126 0.7370 0.7307 0.7417 0.7540 0.7264 0.7268
0.7508 0.7665 0.7417 0.7565 0.7398 0.7453 0.7483 0.7296 0.7431 0.7230 0.7378 0.7268 0.7484 0.7477 0.7268 0.7356 0.7478 0.7557 0.7332 0.7645
0.7182 0.7399 0.7370 0.7549 0.7551 0.7432 0.7439 0.7350 0.7518 0.7237 0.7469 0.7666 0.7398 0.7547 0.7558 0.7650 0.7465 0.7445 0.7426 0.7426
0.7529 0.7512 0.7435

In [62]:
modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
fname = ['encoding_3106/new_encoding_3106_res_', 'encoding_3106_lrg/new_encoding_3106_lrg_', 
         'pssm_conv_3106/new_pssm_3106_', 'encoding_pssm_3106/new_encoding_pssm_3106_']
res = []
for m in range(2):
    for n in range(2):
        tres = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']
            t = []
            for i in range(20):
                r = np.random.choice(test_y.shape[0], 1000, replace=False)
                t.append(avgprec(test_y[r, :], pred_y[r, :]))
            tres.append(t)
        print(modelname[m*2+n])
        for t in tres:
            print(' '.join(['{:.4f}'.format(x) for x in t]))

BLSTM
0.7675 0.7363 0.7269 0.7621 0.7420 0.7303 0.7447 0.7450 0.7346 0.7469 0.7533 0.7197 0.7552 0.7522 0.7491 0.7333 0.7531 0.7354 0.7467 0.7511
0.7713 0.7728 0.7689 0.7591 0.7489 0.7722 0.7666 0.7716 0.7482 0.7608 0.7461 0.7544 0.7628 0.7517 0.7577 0.7659 0.7738 0.7708 0.7659 0.7612
0.7560 0.7435 0.7454 0.7557 0.7544 0.7714 0.7486 0.7527 0.7492 0.7335 0.7561 0.7338 0.7552 0.7618 0.7501 0.7542 0.7397 0.7402 0.7489 0.7514
0.7599 0.7309 0.7461 0.7449 0.7296 0.7391 0.7529 0.7452 0.7471 0.7437 0.7446 0.7365 0.7464 0.7325 0.7459 0.7307 0.7534 0.7336 0.7291 0.7237
0.7592 0.7494 0.7451 0.7565 0.7510 0.7432 0.7473 0.7668 0.7388 0.7334 0.7628 0.7576 0.7561 0.7545 0.7598 0.7617 0.7462 0.7465 0.7609 0.7527
BLSTM ConvNet1
0.7906 0.8035 0.7988 0.8149 0.7839 0.7944 0.8068 0.7955 0.7991 0.7819 0.7910 0.8051 0.7972 0.7916 0.7852 0.8047 0.8009 0.7872 0.7848 0.7928
0.7721 0.7824 0.7814 0.7724 0.7758 0.7915 0.7756 0.7871 0.7946 0.7802 0.7718 0.7772 0.7904 0.7866 0.7833 0.7846 0.7822 0.7715 0.8003 0.7713

In [64]:
modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
fname = ['4802_result/result_4802_', '4802_result/result_large-reg-4802_', 
         '4802_result/result_pssm-conv-4802_', 'encoding_pssm_4802/new_encoding_pssm_4802_']
res = []
for m in range(2):
    for n in range(2):
        tres = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']
            t = []
            for i in range(20):
                r = np.random.choice(test_y.shape[0], 1000, replace=False)
                t.append(avgprec(test_y[r, :], pred_y[r, :]))
            tres.append(t)
        print(modelname[m*2+n])
        for t in tres:
            print(' '.join(['{:.4f}'.format(x) for x in t]))

BLSTM
0.7128 0.7125 0.6996 0.6951 0.7025 0.6897 0.7005 0.6928 0.6891 0.6877 0.7036 0.6904 0.6879 0.6850 0.6956 0.7090 0.7204 0.6953 0.6896 0.7061
0.7005 0.6971 0.6995 0.6848 0.7130 0.7152 0.6821 0.7185 0.7160 0.6983 0.6890 0.6942 0.6881 0.7111 0.7050 0.7000 0.6943 0.7073 0.7030 0.6858
0.6724 0.6796 0.6641 0.6925 0.6739 0.6817 0.6797 0.6792 0.6962 0.6909 0.6832 0.6910 0.6851 0.6867 0.6849 0.6846 0.6745 0.6974 0.6788 0.6888
0.6867 0.6673 0.6786 0.6986 0.6925 0.6967 0.6829 0.6956 0.6941 0.6770 0.6875 0.6841 0.6930 0.6567 0.6819 0.6738 0.6734 0.6931 0.6767 0.6735
0.6811 0.6770 0.6982 0.6826 0.6754 0.6802 0.6737 0.6722 0.6797 0.6532 0.6839 0.6765 0.6756 0.6798 0.6816 0.6894 0.6874 0.6824 0.6748 0.6776
BLSTM ConvNet1
0.7451 0.7408 0.7578 0.7650 0.7476 0.7435 0.7491 0.7531 0.7463 0.7587 0.7636 0.7697 0.7722 0.7670 0.7634 0.7428 0.7499 0.7532 0.7713 0.7642
0.7510 0.7403 0.7473 0.7293 0.7429 0.7665 0.7482 0.7448 0.7536 0.7442 0.7602 0.7486 0.7463 0.7394 0.7502 0.7492 0.7490 0.7670 0.7523 0.7532

In [60]:
test_y.shape

(7220, 14)

In [42]:
np.amin(t)

0.6745576023041384

In [68]:
a= [1,2,3,4,5]
b= [2,2,2,2,2]

t,p = stats.ttest_ind(a,b)

In [69]:
t

1.414213562373095

In [70]:
p

0.19501552810007572

In [74]:
stats.ttest_1samp(a,2)

Ttest_1sampResult(statistic=1.414213562373095, pvalue=0.23019964108049873)

In [72]:
t

0.23019964108049873

In [73]:
p

1.414213562373095

In [95]:
t, p = stats.ttest_1samp(a,1)

In [96]:
p

0.04742065558431962

In [97]:
t

2.82842712474619

In [100]:
for t in tres:
    print(np.average(t))
    print(stats.ttest_1samp(t,0.6916))

0.7392558215382585
Ttest_1sampResult(statistic=16.425577087730836, pvalue=1.1032537454556298e-12)
0.7401285413183635
Ttest_1sampResult(statistic=19.13977386991606, pvalue=7.071347429729854e-14)
0.7419374437613769
Ttest_1sampResult(statistic=23.987157121407098, pvalue=1.142218887410092e-15)
0.740900607128036
Ttest_1sampResult(statistic=20.970421738305454, pvalue=1.3435574169367129e-14)
0.7482548216821112
Ttest_1sampResult(statistic=25.04507846932193, pvalue=5.152557611326228e-16)


In [101]:
np.average(tres)

0.7420954470856294

In [116]:
stats.ttest_1samp([0.7377,0.7406,0.7416,0.7428,0.7445],0.7382)

Ttest_1sampResult(statistic=2.8453913149701906, pvalue=0.04661256496853713)

In [117]:
stats.ttest_ind([0.7377,0.7406,0.7416,0.7428,0.7445], [0.7395,0.7372,0.7514,0.7499,0.7484])

Ttest_indResult(statistic=-1.2356276663966228, pvalue=0.2516485029056008)

In [79]:
from sklearn.metrics import roc_curve, auc

In [80]:
modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
fname = ['encoding_3106/new_encoding_3106_res_', 'encoding_3106_lrg/new_encoding_3106_lrg_', 
         'pssm_conv_3106/new_pssm_3106_', 'encoding_pssm_3106/new_encoding_pssm_3106_']
res3106 = []
for m in range(2):
    for n in range(2):
        tres3106 = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']

            resauc = []
            for i in range(14):
#                 if i in [18, 23, 28, 31]:
#                     continue
                fpr, tpr, _ = roc_curve(test_y[:, i], pred_y[:, i])
                roc_auc = auc(fpr, tpr)
                resauc.append(roc_auc)
                
            
            tres3106.append(resauc)
        res3106.append(tres3106)

In [81]:
np.array(res3106).shape

(4, 5, 14)

In [88]:
for i in range(4):
    print(' '.join(['{:.4f}'.format(x) for x in np.average(res3106[i], axis = 0)]))
    print(' '.join(['{:.4f}'.format(x) for x in np.std(res3106[i], axis = 0)]))

0.8992 0.8414 0.8847 0.8594 0.8644 0.8974 0.8575 0.9059 0.8817 0.8805 0.8710 0.8362 0.8834 0.8630
0.0094 0.0072 0.0152 0.0374 0.0192 0.0061 0.0068 0.0097 0.0193 0.0100 0.0056 0.0096 0.0120 0.0260
0.9302 0.8807 0.9163 0.8801 0.8921 0.9124 0.8928 0.9129 0.9040 0.9055 0.8962 0.8728 0.9097 0.9088
0.0052 0.0021 0.0080 0.0233 0.0049 0.0060 0.0119 0.0176 0.0249 0.0132 0.0015 0.0246 0.0036 0.0247
0.8222 0.6473 0.7812 0.7104 0.7055 0.8526 0.6934 0.8244 0.8246 0.8103 0.7611 0.7555 0.7832 0.7261
0.0112 0.0111 0.0163 0.0277 0.0131 0.0177 0.0084 0.0191 0.0320 0.0153 0.0071 0.0228 0.0012 0.0155
0.9282 0.8815 0.9084 0.8838 0.8939 0.9235 0.8892 0.9239 0.8924 0.9113 0.8982 0.8954 0.9101 0.8749
0.0092 0.0053 0.0091 0.0171 0.0071 0.0063 0.0110 0.0263 0.0173 0.0052 0.0013 0.0204 0.0041 0.0166


In [82]:
np.array(res3106)

array([[[0.88558773, 0.82837235, 0.87432089, 0.80845188, 0.83322594,
         0.89923436, 0.85224886, 0.89549255, 0.85834918, 0.86279965,
         0.86778552, 0.82030095, 0.88647382, 0.83581046],
        [0.91350214, 0.85007062, 0.90776665, 0.90176712, 0.86299885,
         0.89766938, 0.87061877, 0.92415097, 0.86089824, 0.88816526,
         0.8760375 , 0.83149298, 0.89270727, 0.89969798],
        [0.90476728, 0.84260502, 0.89794745, 0.83011047, 0.85756158,
         0.886856  , 0.85617672, 0.90132237, 0.88709378, 0.88939266,
         0.87691736, 0.84802986, 0.8906857 , 0.8613677 ],
        [0.89483139, 0.84112812, 0.87130936, 0.90084514, 0.88894366,
         0.90568126, 0.85627738, 0.90353636, 0.89401735, 0.88600057,
         0.86175284, 0.83834565, 0.85985293, 0.83388115],
        [0.89728526, 0.84480593, 0.87232599, 0.85567348, 0.8790821 ,
         0.89772155, 0.85225944, 0.90515152, 0.90809627, 0.87628303,
         0.87240895, 0.84275245, 0.88743305, 0.88419656]],

       [[0.9255170

In [131]:
for i in range(4):
    for j in range(4):
        t, p = stats.ttest_ind(res3106[i],res3106[j], axis = None)
        print(i, j, t, p)

0 1 -6.834092542627083 2.430516210458465e-10
0 2 13.682742034418075 1.821938319497012e-27
0 3 -6.925773491212086 1.505348664605861e-10
1 2 17.585626541071637 4.713645927400066e-37
1 3 -0.0027180149633849173 0.9978352656543824
2 3 -17.6485417629732 3.3475837413283366e-37


In [89]:
fname = ['4802_result/result_4802_', '4802_result/result_large-reg-4802_', 
         '4802_result/result_pssm-conv-4802_', 'encoding_pssm_4802/new_encoding_pssm_4802_']
modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
res4802 = []
for m in range(2):
    for n in range(2):
        tres4802 = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']

            resauc = []
            for i in range(37):
                if i in [18, 23, 28, 31]:
                    continue
                fpr, tpr, _ = roc_curve(test_y[:, i], pred_y[:, i])
                roc_auc = auc(fpr, tpr)
                resauc.append(roc_auc)
                
            tres4802.append(resauc)
        res4802.append(tres4802)

In [90]:
np.array(res4802).shape

(4, 5, 33)

In [91]:
for i in range(4):
    print(' '.join(['{:.4f}'.format(x) for x in np.average(res4802[i], axis = 0)]))
    print(' '.join(['{:.4f}'.format(x) for x in np.std(res4802[i], axis = 0)]))

0.7966 0.8445 0.7223 0.8788 0.8211 0.7732 0.8250 0.7429 0.8008 0.7504 0.7884 0.8908 0.8221 0.7777 0.7217 0.8710 0.7537 0.8212 0.8251 0.8269 0.6855 0.7595 0.8573 0.7694 0.8502 0.8747 0.7976 0.8713 0.6958 0.6371 0.6592 0.7779 0.7650
0.0389 0.0209 0.1449 0.0034 0.0079 0.0142 0.0190 0.0844 0.0317 0.0186 0.0201 0.0066 0.0144 0.0935 0.1470 0.0243 0.1208 0.0414 0.0373 0.0564 0.0609 0.0451 0.0098 0.0409 0.0075 0.0039 0.0406 0.0064 0.1141 0.1478 0.0277 0.0681 0.0889
0.8852 0.9017 0.8749 0.9244 0.8599 0.8771 0.8876 0.8969 0.8712 0.8604 0.8655 0.9017 0.8786 0.8124 0.8138 0.9194 0.8703 0.9027 0.8955 0.8820 0.8745 0.8714 0.8955 0.8729 0.8789 0.8996 0.8903 0.8991 0.7595 0.8590 0.8833 0.8768 0.7916
0.0467 0.0195 0.0680 0.0080 0.0025 0.0250 0.0197 0.0623 0.0144 0.0174 0.0059 0.0136 0.0051 0.0749 0.1028 0.0263 0.0612 0.0351 0.0088 0.0543 0.0403 0.0330 0.0064 0.0123 0.0069 0.0020 0.0160 0.0044 0.1484 0.0556 0.0244 0.0222 0.0938
0.7755 0.7628 0.6950 0.8170 0.6579 0.6445 0.7443 0.6431 0.7262 0.6081 0.6512

In [134]:
for i in range(4):
    for j in range(4):
        t, p = stats.ttest_ind(res4802[i],res4802[j], axis = None)
        print(i, j, t, p)

0 0 0.0 1.0
0 1 -9.917222979625578 1.9024151613786604e-20
0 2 9.151725759488714 6.125901709638712e-18
0 3 -8.196880601326026 5.617768150368543e-15
1 0 9.917222979625578 1.9024151613786604e-20
1 1 0.0 1.0
1 2 19.01194640468356 7.432637645557658e-55
1 3 1.2428163888904986 0.21482321419622596
2 0 -9.151725759488714 6.125901709638712e-18
2 1 -19.01194640468356 7.432637645557658e-55
2 2 0.0 1.0
2 3 -17.077471764449076 3.1498292609453327e-47
3 0 8.196880601326026 5.617768150368543e-15
3 1 -1.2428163888904986 0.21482321419622596
3 2 17.077471764449076 3.1498292609453327e-47
3 3 0.0 1.0


In [142]:
modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
fname = ['encoding_3106/new_encoding_3106_res_', 'encoding_3106_lrg/new_encoding_3106_lrg_', 
         'pssm_conv_3106/new_pssm_3106_', 'encoding_pssm_3106/new_encoding_pssm_3106_']
ap_res3106 = []
rl_res3106 = []
cov_res3106 = []
for m in range(2):
    for n in range(2):
        ap_tres = []
        rl_tres = []
        cov_tres = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']
            ap_t = []
            rl_t = []
            cov_t = []
            for i in range(20):
                r = np.random.choice(test_y.shape[0], 1000, replace=False)
                ap_t.append(avgprec(test_y[r, :], pred_y[r, :]))
                rl_t.append(label_ranking_loss(test_y[r, :], pred_y[r, :]))
                cov_t.append(coverage_error(test_y[r, :], pred_y[r, :]) - 1)
            ap_tres.append(ap_t)
            rl_tres.append(rl_t)
            cov_tres.append(cov_t)
        ap_res3106.append(ap_tres)
        rl_res3106.append(rl_tres)
        cov_res3106.append(cov_tres)

In [145]:
for tmp in [ap_res3106, rl_res3106, cov_res3106]:
    print('*********')
    for i in range(4):
        for j in range(4):
            t, p = stats.ttest_ind(tmp[i],tmp[j], axis = None)
            print(i, j, t, p)

*********
0 0 0.0 1.0
0 1 -24.3248689538197 2.1684970623894778e-61
0 2 63.3824190699687 1.8832847608287137e-133
0 3 -24.04105317218826 1.2341547231479016e-60
1 0 24.3248689538197 2.1684970623894778e-61
1 1 0.0 1.0
1 2 90.05202914409593 1.2624579884093112e-162
1 3 -1.9464386589745981 0.053017009007213185
2 0 -63.3824190699687 1.8832847608287137e-133
2 1 -90.05202914409593 1.2624579884093112e-162
2 2 0.0 1.0
2 3 -85.68176657130992 1.8727962826678633e-158
3 0 24.04105317218826 1.2341547231479016e-60
3 1 1.9464386589745981 0.053017009007213185
3 2 85.68176657130992 1.8727962826678633e-158
3 3 0.0 1.0
*********
0 0 0.0 1.0
0 1 22.054059879355385 3.174406309203209e-55
0 2 -38.02756224172806 5.938878008326005e-93
0 3 23.094629914559757 4.380896219567693e-58
1 0 -22.054059879355385 3.174406309203209e-55
1 1 0.0 1.0
1 2 -64.91832227886557 2.0421106228606457e-135
1 3 2.619870279673447 0.009477990246179575
2 0 38.02756224172806 5.938878008326005e-93
2 1 64.91832227886557 2.0421106228606457e-135
2

In [143]:
fname = ['4802_result/result_4802_', '4802_result/result_large-reg-4802_', 
         '4802_result/result_pssm-conv-4802_', 'encoding_pssm_4802/new_encoding_pssm_4802_']
modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
ap_res4802 = []
rl_res4802 = []
cov_res4802 = []
for m in range(2):
    for n in range(2):
        ap_tres = []
        rl_tres = []
        cov_tres = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']
            ap_t = []
            rl_t = []
            cov_t = []
            for i in range(20):
                r = np.random.choice(test_y.shape[0], 1000, replace=False)
                ap_t.append(avgprec(test_y[r, :], pred_y[r, :]))
                rl_t.append(label_ranking_loss(test_y[r, :], pred_y[r, :]))
                cov_t.append(coverage_error(test_y[r, :], pred_y[r, :]) - 1)
            ap_tres.append(ap_t)
            rl_tres.append(rl_t)
            cov_tres.append(cov_t)
        ap_res4802.append(ap_tres)
        rl_res4802.append(rl_tres)
        cov_res4802.append(cov_tres)

In [146]:
for tmp in [ap_res4802, rl_res4802, cov_res4802]:
    print('*********')
    for i in range(4):
        for j in range(4):
            t, p = stats.ttest_ind(tmp[i],tmp[j], axis = None)
            print(i, j, t, p)

*********
0 0 0.0 1.0
0 1 -36.14688514559577 3.87807376502247e-89
0 2 40.82023189766455 2.323659385373657e-98
0 3 -27.89248122943891 1.64620468073748e-70
1 0 36.14688514559577 3.87807376502247e-89
1 1 0.0 1.0
1 2 83.56514254182495 2.321765690139202e-156
1 3 6.978306098563892 4.391776047750927e-11
2 0 -40.82023189766455 2.323659385373657e-98
2 1 -83.56514254182495 2.321765690139202e-156
2 2 0.0 1.0
2 3 -71.9737242663102 6.277424635854483e-144
3 0 27.89248122943891 1.64620468073748e-70
3 1 -6.978306098563892 4.391776047750927e-11
3 2 71.9737242663102 6.277424635854483e-144
3 3 0.0 1.0
*********
0 0 0.0 1.0
0 1 24.220026934891965 4.1173840429148696e-61
0 2 15.081116089387837 1.0046809878369223e-34
0 3 18.716271572250797 1.1383332073385183e-45
1 0 -24.220026934891965 4.1173840429148696e-61
1 1 0.0 1.0
1 2 -15.260455589886567 2.839682138725878e-35
1 3 -6.507226469220514 6.134999808833241e-10
2 0 -15.081116089387837 1.0046809878369223e-34
2 1 15.260455589886567 2.839682138725878e-35
2 2 0.0 

In [4]:
ap_old = {'IMMMLGP' : 0.5810, 'Hum-mPloc' : 0.5790, 'MKSVM' : 0.7065, 'FSVM-KNR' : 0.7108}

modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
fname = ['encoding_3106/new_encoding_3106_res_', 'encoding_3106_lrg/new_encoding_3106_lrg_', 
         'pssm_conv_3106/new_pssm_3106_', 'encoding_pssm_3106/new_encoding_pssm_3106_']
ap_res3106 = []
rl_res3106 = []
cov_res3106 = []
for m in range(2):
    for n in range(2):
        ap_tres = []
        rl_tres = []
        cov_tres = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']
            
            ap_tres.append(avgprec(test_y, pred_y))
            rl_tres.append(label_ranking_loss(test_y, pred_y))
            cov_tres.append(coverage_error(test_y, pred_y) - 1)
        ap_res3106.append(ap_tres)
        rl_res3106.append(rl_tres)
        cov_res3106.append(cov_tres)

In [1]:
ap_old = {'IMMMLGP' : 0.5810, 'Hum-mPloc' : 0.5790, 'MKSVM' : 0.7065, 'FSVM-KNR' : 0.7108}
for k,v in ap_old.items():
    print(k, v)

IMMMLGP 0.581
Hum-mPloc 0.579
MKSVM 0.7065
FSVM-KNR 0.7108


In [5]:
np.array(ap_res3106).shape

(4, 5)

In [7]:
np.average(ap_res3106)

0.7432027533265829

In [11]:
from scipy import stats
for k,v in ap_old.items():
    print(k)
    for i in range(4):
        t,p = stats.ttest_1samp(ap_res3106[i], v)
        print(modelname[i], t, p)

IMMMLGP
BLSTM 51.80217129366279 8.31153477297089e-07
BLSTM ConvNet1 152.4489829670825 1.1105255536435559e-08
ConvNet2 17.06681620391189 6.912991022870944e-05
BLSTM ConvNet1 ConvNet2 92.02010406598485 8.361390853319203e-08
Hum-mPloc
BLSTM 52.406808453572 7.935000338208905e-07
BLSTM ConvNet1 153.92469807119798 1.0685523832418423e-08
ConvNet2 17.619387936816118 6.09422852217665e-05
BLSTM ConvNet1 ConvNet2 92.90034745188488 8.049084712168514e-08
MKSVM
BLSTM 13.861189509359894 0.00015704651101538906
BLSTM ConvNet1 59.84786018383603 4.668193298788366e-07
ConvNet2 -17.60706003582826 6.111132796158663e-05
BLSTM ConvNet1 ConvNet2 36.784831600757336 3.2609149636446668e-06
FSVM-KNR
BLSTM 12.561219615555103 0.00023114982498879204
BLSTM ConvNet1 56.675072709987774 5.80339407425591e-07
ConvNet2 -18.795089261572336 4.7186936105740157e-05
BLSTM ConvNet1 ConvNet2 34.892308321072264 4.025854374104958e-06


In [12]:
ap_old4802 = {'IMMMLGP' : 0.5725, 'Hum-mPloc' : 0.5644, 'MKSVM' : 0.6889, 'FSVM-KNR' : 0.6916, 'mGOF-loc':0.6482}
fname = ['4802_result/result_4802_', '4802_result/result_large-reg-4802_', 
         '4802_result/result_pssm-conv-4802_', 'encoding_pssm_4802/new_encoding_pssm_4802_']
modelname = ['BLSTM', 'BLSTM ConvNet1', 'ConvNet2', 'BLSTM ConvNet1 ConvNet2']
ap_res4802 = []
rl_res4802 = []
cov_res4802 = []
for m in range(2):
    for n in range(2):
        ap_tres = []
        rl_tres = []
        cov_tres = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']
            
            ap_tres.append(avgprec(test_y, pred_y))
            rl_tres.append(label_ranking_loss(test_y, pred_y))
            cov_tres.append(coverage_error(test_y, pred_y) - 1)
        ap_res4802.append(ap_tres)
        rl_res4802.append(rl_tres)
        cov_res4802.append(cov_tres)

In [13]:
np.array(ap_res4802).shape

(4, 5)

In [14]:
for k,v in ap_old4802.items():
    print(k)
    for i in range(4):
        t,p = stats.ttest_1samp(ap_res4802[i], v)
        print(modelname[i], t, p)

IMMMLGP
BLSTM 28.08756912267197 9.559463768055596e-06
BLSTM ConvNet1 91.40625864042781 8.588178590369135e-08
ConvNet2 43.113914509118345 1.7303159911059095e-06
BLSTM ConvNet1 ConvNet2 150.10517605522958 1.1815173104228164e-08
Hum-mPloc
BLSTM 30.03047342787238 7.323165816150988e-06
BLSTM ConvNet1 95.55641101694408 7.191093148906346e-08
ConvNet2 50.59747922919599 9.130733017014628e-07
BLSTM ConvNet1 ConvNet2 157.30207410441017 9.79707545597549e-09
MKSVM
BLSTM 0.16731466275498588 0.8752405149044679
BLSTM ConvNet1 31.76703189641611 5.853034286831729e-06
ConvNet2 -64.4276822090345 3.476674519933665e-07
BLSTM ConvNet1 ConvNet2 46.68308557070823 1.2594637308697596e-06
FSVM-KNR
BLSTM -0.4803201056451592 0.6560869644893341
BLSTM ConvNet1 30.383647770910667 6.989752916419787e-06
ConvNet2 -66.92220378239375 2.9869236004883306e-07
BLSTM ConvNet1 ConvNet2 44.284119554314664 1.5548348165254664e-06
mGOF-loc
BLSTM 9.929809134564442 0.0005775372497096435
BLSTM ConvNet1 52.620266677183075 7.807179112217

In [15]:
np.average(ap_res4802[2])

0.6191652886139849

In [16]:
rl_old3106 = {'IMMMLGP' : 0.4190, 'Hum-mPloc' : 0.4906, 'MKSVM' : 0.1085, 'FSVM-KNR' : 0.1071}
for k,v in rl_old3106.items():
    print(k)
    for i in range(4):
        t,p = stats.ttest_1samp(rl_res3106[i], v)
        print(modelname[i], t, p)

IMMMLGP
BLSTM -134.49051375906893 1.8332622133032055e-08
BLSTM ConvNet1 -348.54482446435406 4.0653024783524985e-10
ConvNet2 -174.63721359821358 6.449249061631241e-09
BLSTM ConvNet1 ConvNet2 -214.2308548905618 2.8481318089091628e-09
Hum-mPloc
BLSTM -164.368745723657 8.218018755641484e-09
BLSTM ConvNet1 -421.70086398271843 1.8972169945630583e-10
ConvNet2 -217.983422669676 2.6570299587146855e-09
BLSTM ConvNet1 ConvNet2 -258.9219367073347 1.3348506492411743e-09
MKSVM
BLSTM -4.920806705931999 0.007924815605760846
BLSTM ConvNet1 -31.296915659156554 6.21147811441155e-06
ConvNet2 13.337617640460845 0.0001826986919129142
BLSTM ConvNet1 ConvNet2 -20.42385902313194 3.393835604518811e-05
FSVM-KNR
BLSTM -4.336595466400947 0.012285783456783814
BLSTM ConvNet1 -29.86649030544552 7.484721077675733e-06
ConvNet2 14.185169214483853 0.00014340347868050884
BLSTM ConvNet1 ConvNet2 -19.550011054647552 4.0366924790104446e-05


In [17]:
rl_old4802 = {'IMMMLGP' : 0.2436, 'Hum-mPloc' : 0.3145, 'MKSVM' : 0.0662, 'FSVM-KNR' : 0.0971, 'mGOF-loc':0.0606}
for k,v in rl_old4802.items():
    print(k)
    for i in range(4):
        t,p = stats.ttest_1samp(rl_res4802[i], v)
        print(modelname[i], t, p)

IMMMLGP
BLSTM -62.21465247199658 3.997908884276463e-07
BLSTM ConvNet1 -156.24636780466648 1.0064517092610046e-08
ConvNet2 -431.9599673292149 1.7233033608428154e-10
BLSTM ConvNet1 ConvNet2 -195.5497578152358 4.102479410413782e-09
Hum-mPloc
BLSTM -89.27774768398079 9.436592162855248e-08
BLSTM ConvNet1 -216.4263624169768 2.7343166089696656e-09
ConvNet2 -606.6416022437231 4.4301111531208946e-11
BLSTM ConvNet1 ConvNet2 -272.63226815066076 1.0859336025451085e-09
MKSVM
BLSTM 5.500341753758003 0.005326933849381976
BLSTM ConvNet1 -5.669061116036759 0.0047751828053250985
ConvNet2 5.11368618042925 0.006916410735658959
BLSTM ConvNet1 ConvNet2 -2.6804019124941263 0.05520552268540775
FSVM-KNR
BLSTM -6.294434579814811 0.003255108482054693
BLSTM ConvNet1 -31.897013633954796 5.758518626631217e-06
ConvNet2 -71.01695583449751 2.3557499158543485e-07
BLSTM ConvNet1 ConvNet2 -36.27489513343392 3.447700187083014e-06
mGOF-loc
BLSTM 7.637906396735597 0.0015782830375087743
BLSTM ConvNet1 -0.9157752228218468 0.4

In [18]:
cov_old3106 = {'IMMMLGP' : 4.3030, 'Hum-mPloc' : 5.3170, 'MKSVM' : 1.7193, 'FSVM-KNR' : 1.7025}
for k,v in cov_old3106.items():
    print(k)
    for i in range(4):
        t,p = stats.ttest_1samp(cov_res3106[i], v)
        print(modelname[i], t, p)

IMMMLGP
BLSTM -73.16255743814959 2.0914863463461742e-07
BLSTM ConvNet1 -193.74021710295565 4.257895423542819e-09
ConvNet2 -99.14686636817224 6.204985974699275e-08
BLSTM ConvNet1 ConvNet2 -131.00500674454975 2.0362495112692194e-08
Hum-mPloc
BLSTM -100.50286014776448 5.876936255770969e-08
BLSTM ConvNet1 -259.40748002407145 1.3248851935367272e-09
ConvNet2 -143.30322450666395 1.4222843295989515e-08
BLSTM ConvNet1 ConvNet2 -175.01804260315228 6.39330533069821e-09
MKSVM
BLSTM -3.498711175001713 0.024925242042322347
BLSTM ConvNet1 -26.418217882751655 1.2201130670646846e-05
ConvNet2 13.364753476424408 0.00018124636968576828
BLSTM ConvNet1 ConvNet2 -18.858576026727896 4.656056728370076e-05
FSVM-KNR
BLSTM -3.0457357454932965 0.03818719124725636
BLSTM ConvNet1 -25.33023956216511 1.4424308934965506e-05
ConvNet2 14.09633810830475 0.00014699211509741262
BLSTM ConvNet1 ConvNet2 -18.129365965165245 5.443305722043779e-05


In [19]:
cov_old4802 = {'IMMMLGP' : 4.9772, 'Hum-mPloc' : 5.6830, 'MKSVM' : 2.9753, 'FSVM-KNR' : 2.6339, 'mGOF-loc':3.0227}
for k,v in cov_old4802.items():
    print(k)
    for i in range(4):
        t,p = stats.ttest_1samp(cov_res4802[i], v)
        print(modelname[i], t, p)

IMMMLGP
BLSTM -9.62123726250512 0.0006524992067716298
BLSTM ConvNet1 -41.2096199653553 2.072305815450267e-06
ConvNet2 -124.13323596088067 2.525870439473877e-08
BLSTM ConvNet1 ConvNet2 -45.769886016216745 1.362859573167806e-06
Hum-mPloc
BLSTM -15.599490255818587 9.860623713327593e-05
BLSTM ConvNet1 -55.116275905459716 6.487528825366529e-07
ConvNet2 -176.59409668559078 6.168132417751032e-09
BLSTM ConvNet1 ConvNet2 -62.556748195703044 3.9112457080113475e-07
MKSVM
BLSTM 7.335215935729831 0.0018387284845669922
BLSTM ConvNet1 -1.7653942264844777 0.15225540171484275
ConvNet2 24.664436304346278 1.6036959086980153e-05
BLSTM ConvNet1 ConvNet2 1.8436297065285487 0.13901211525854984
FSVM-KNR
BLSTM 10.226935363212426 0.0005152071311192792
BLSTM ConvNet1 4.961344705155708 0.007698736671975414
ConvNet2 50.04009208702695 9.54384703369041e-07
BLSTM ConvNet1 ConvNet2 9.963542922845662 0.0005700061217520458
mGOF-loc
BLSTM 6.9337294071338285 0.002271636933789867
BLSTM ConvNet1 -2.6993351326348756 0.054131

In [20]:
fname = ['encoding_3106/new_encoding_3106_res_', 'encoding_3106_lrg/new_encoding_3106_lrg_', 
         'pssm_conv_3106/new_pssm_3106_', 'encoding_pssm_3106/new_encoding_pssm_3106_']
ap_res3106 = []
rl_res3106 = []
cov_res3106 = []
for m in range(2):
    for n in range(2):
        ap_tres = []
        rl_tres = []
        cov_tres = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']
            ap_t = []
            rl_t = []
            cov_t = []
            for i in range(20):
                r = np.random.choice(test_y.shape[0], 1000, replace=False)
                ap_t.append(avgprec(test_y[r, :], pred_y[r, :]))
                rl_t.append(label_ranking_loss(test_y[r, :], pred_y[r, :]))
                cov_t.append(coverage_error(test_y[r, :], pred_y[r, :]) - 1)
            ap_tres.append(ap_t)
            rl_tres.append(rl_t)
            cov_tres.append(cov_t)
        ap_res3106.append(ap_tres)
        rl_res3106.append(rl_tres)
        cov_res3106.append(cov_tres)

In [22]:
np.array(ap_res3106).shape

(4, 5, 20)

In [25]:
stats.ttest_1samp(ap_res3106[0], 0.6, axis = 1)

Ttest_1sampResult(statistic=array([86.43553125, 57.33991361, 73.05073552, 53.51578821, 91.28761227]), pvalue=array([3.96165557e-26, 9.36882699e-23, 9.59793938e-25, 3.45041865e-22,
       1.40681495e-26]))

In [38]:
ap_old3106 = {'IMMMLGP' : 0.5810, 'Hum-mPloc' : 0.5790, 'MKSVM' : 0.7065, 'FSVM-KNR' : 0.7108}
for i in range(4):
    print(modelname[i])
    for k,v in ap_old3106.items():
        print(k)
        t, p = stats.ttest_1samp(ap_res3106[i], v, axis = 1)
        print('t', ' '.join([str(x) for x in t]))
        print('p', ' '.join([str(x) for x in p]))
        t, p = stats.ttest_1samp(ap_res3106[i], v, axis = None)
        print('total', t, p)

BLSTM
IMMMLGP
t 97.50749692897513 64.14803658288247 82.32898867165001 60.78866772594561 102.37200253598566
p 4.0312901909750064e-27 1.123118129846713e-23 9.965719642362772e-26 3.106195959884092e-23 1.601218415508867e-27
total 141.33783837658817 5.08344784366495e-116
Hum-mPloc
t 98.67296700034171 64.8646811067062 83.30564689768626 61.55423399031577 103.5387804590965
p 3.217990021681306e-27 9.102251910029368e-24 7.969905244457215e-26 2.4516301544156916e-23 1.2915013317016717e-27
total 143.00214594944356 1.6042078384719475e-116
MKSVM
t 24.37424995072143 19.178592712943345 21.043684987875526 12.74938463671789 29.156687860780114
p 8.503790635419019e-16 6.816590477082205e-14 1.2606944435002754e-14 9.252008394379847e-11 3.077001210870968e-17
total 36.90253817991306 1.13420447347658e-59
FSVM-KNR
t 21.868489297283272 17.637806986722335 18.94386980189761 11.103417168322052 26.648115326091816
p 6.242396355640298e-15 3.087403477200651e-13 8.518622571930448e-14 9.505209673254456e-10 1.6352732966237

In [39]:
for i in range(4):
    for k,v in ap_old3106.items():
#         print(k)
#         t, p = stats.ttest_1samp(ap_res3106[i], v, axis = 1)
#         print('t', ' '.join([str(x) for x in t]))
#         print('p', ' '.join([str(x) for x in p]))
        t, p = stats.ttest_1samp(ap_res3106[i], v, axis = None)
        print(t, p)

141.33783837658817 5.08344784366495e-116
143.00214594944356 1.6042078384719475e-116
36.90253817991306 1.13420447347658e-59
33.324276898274015 1.3061894296547743e-55
259.6878942564501 4.217214094652253e-142
262.1832142942867 1.6385211174043525e-142
103.10656188220263 1.4939235478062541e-102
97.74162380085396 2.81616702025555e-100
53.20082143820715 1.2282068197076493e-74
54.898346599148105 6.062660947254343e-76
-53.31888241083789 9.934512276810387e-75
-56.968561506860915 1.735804829548989e-77
204.77353830607524 6.617062528769577e-132
206.72428992136517 2.5939238328500273e-132
82.36387444663104 5.25595821720234e-93
78.16975847375768 8.591052772352556e-91


In [46]:
for i in range(4):
    print(modelname[i])
    for t in rl_res3106[i]:
        print(' '.join([str(x) for x in t]))
for i in range(4):
    for k,v in rl_old3106.items():
        # print(k)
        t, p = stats.ttest_1samp(rl_res3106[i], v, axis = 1)
        print(' '.join([str(-x) for x in t]))
        print(' '.join([str(x) for x in p]))

BLSTM
0.09119959207459208 0.0966252331002331 0.09788053613053614 0.0968451048951049 0.09314032634032635 0.09889609557109558 0.09704423076923076 0.10574038461538461 0.10061247086247087 0.10143822843822843 0.09438117715617717 0.09546695804195804 0.09220425407925409 0.08753409090909091 0.09361346153846155 0.09394428904428903 0.09818065268065268 0.09911241258741259 0.08506410256410257 0.095233682983683
0.10016713286713286 0.0977983682983683 0.0929969696969697 0.09913158508158508 0.08160460372960374 0.09063053613053615 0.08998106060606059 0.10652505827505827 0.09063519813519814 0.09414685314685314 0.09213461538461538 0.08600402097902098 0.08595034965034964 0.09678904428904428 0.09798986013986014 0.09476340326340327 0.09437966200466201 0.0943560606060606 0.09906410256410257 0.08379166666666665
0.0916853146853147 0.09925134032634032 0.10813193473193473 0.0961562937062937 0.10019743589743589 0.09621410256410258 0.09511975524475524 0.10435984848484849 0.0977640442890443 0.1034862470862471 0.107

In [43]:
for i in range(4):
    for k,v in rl_old3106.items():
        # print(k)
        t, p = stats.ttest_1samp(rl_res3106[i], v, axis = None)
        print(-t, p)

405.7494030310484 2.8618136795308687e-161
495.96769380667433 6.737795293393896e-170
14.509468871385879 3.014723406314724e-26
12.745424079683698 1.3406826492205458e-22
822.4240828333124 1.2284759463448609e-191
994.7037949912714 8.187562347923325e-200
75.317230528197 3.2075198671576255e-89
71.94863280443803 2.744851201552263e-87
427.2685801525266 1.7220022261111836e-163
533.1114244489247 5.303639484789464e-173
-31.728670602104845 1.113529387008512e-53
-33.798223423542794 3.609107982636183e-56
691.5758691553252 3.4547711798741124e-184
835.6351661547803 2.536859177317463e-192
66.8494485082467 3.4347944990810724e-84
64.03264661160931 2.2151517054444967e-82


In [35]:
rl_old3106

{'IMMMLGP': 0.419, 'Hum-mPloc': 0.4906, 'MKSVM': 0.1085, 'FSVM-KNR': 0.1071}

In [50]:
for i in range(4):
    print(modelname[i])
    for t in cov_res3106[i]:
        print(' '.join(['{:.4f}'.format(x) for x in t]))
for i in range(4):
    for k,v in cov_old3106.items():
        # print(k)
        t, p = stats.ttest_1samp(cov_res3106[i], v, axis = 1)
        print(' '.join([str(-x) for x in t]))
        print(' '.join([str(x) for x in p]))

BLSTM
1.4660 1.5570 1.6040 1.5660 1.5130 1.6000 1.5940 1.6750 1.6400 1.6390 1.5020 1.5390 1.4930 1.4540 1.5540 1.5340 1.5560 1.6290 1.4030 1.6100
1.6410 1.5970 1.5000 1.5820 1.3840 1.5090 1.4700 1.6810 1.4680 1.5640 1.5640 1.4840 1.4040 1.5860 1.5800 1.5160 1.5050 1.5410 1.6030 1.3960
1.4690 1.6330 1.7540 1.6300 1.6000 1.5780 1.5780 1.6860 1.5790 1.6650 1.7670 1.7010 1.6060 1.4600 1.4860 1.5600 1.7210 1.4820 1.6500 1.5060
1.7820 1.9090 1.7310 1.6350 1.7680 1.8370 1.6900 1.6810 1.7710 1.6880 1.9060 1.7330 1.8880 1.6400 1.7280 1.7460 1.7780 1.8200 1.6280 1.7980
1.5810 1.5190 1.4780 1.5660 1.4400 1.5320 1.5040 1.3550 1.5800 1.4010 1.5910 1.4830 1.4690 1.4710 1.4030 1.6090 1.5330 1.5040 1.4460 1.5150
BLSTM ConvNet1
1.4250 1.3280 1.3910 1.2360 1.3550 1.3240 1.2490 1.3460 1.3250 1.2760 1.3340 1.2940 1.4510 1.4010 1.3260 1.3310 1.2780 1.2480 1.3060 1.4180
1.4310 1.2950 1.3110 1.3440 1.2700 1.3050 1.4750 1.3380 1.3940 1.3130 1.3500 1.3140 1.3780 1.3260 1.3360 1.3880 1.3090 1.3710 1.3640 1.3500

In [51]:
for i in range(4):
    for k,v in cov_old3106.items():
        # print(k)
        t, p = stats.ttest_1samp(cov_res3106[i], v, axis = None)
        print(-t, p)

224.75818578363538 6.690512276690808e-136
308.747734440386 1.5635074170178923e-149
10.750496568204857 2.539413054276464e-18
9.358953750223177 2.7382631150680663e-15
469.9438716541475 1.395968456449475e-167
628.7616070053979 4.28078107873796e-180
65.27189647956615 3.468427500181982e-83
62.640596722267304 1.8529995868717813e-81
225.90521235456325 4.045930605336407e-136
326.2723355756679 6.648715642147545e-152
-29.832989091559078 2.799110776562892e-51
-31.49587634019278 2.163037127134316e-53
421.85608658744957 6.07943112597941e-163
563.1986866891123 2.3164556976065886e-175
61.71123857693109 7.846144002514828e-81
59.36946768767275 3.2705939990850006e-79


In [52]:
ap_old4802

{'IMMMLGP': 0.5725,
 'Hum-mPloc': 0.5644,
 'MKSVM': 0.6889,
 'FSVM-KNR': 0.6916,
 'mGOF-loc': 0.6482}

In [55]:
fname = ['4802_result/result_4802_', '4802_result/result_large-reg-4802_', 
         '4802_result/result_pssm-conv-4802_', 'encoding_pssm_4802/new_encoding_pssm_4802_']
ap_res4802 = []
rl_res4802 = []
cov_res4802 = []
for m in range(2):
    for n in range(2):
        ap_tres = []
        rl_tres = []
        cov_tres = []
        for c in range(5):
            filename = fname[m*2+n] +str(c) + '_' +  '9.mat'
            result = loadmat(filename)
            test_y = result['test_y']
            pred_y = result['pred_y']
            ap_t = []
            rl_t = []
            cov_t = []
            for i in range(20):
                r = np.random.choice(test_y.shape[0], 1000, replace=False)
                ap_t.append(avgprec(test_y[r, :], pred_y[r, :]))
                rl_t.append(label_ranking_loss(test_y[r, :], pred_y[r, :]))
                cov_t.append(coverage_error(test_y[r, :], pred_y[r, :]) - 1)
            ap_tres.append(ap_t)
            rl_tres.append(rl_t)
            cov_tres.append(cov_t)
        ap_res4802.append(ap_tres)
        rl_res4802.append(rl_tres)
        cov_res4802.append(cov_tres)

In [58]:
ap_old4802

{'IMMMLGP': 0.5725,
 'Hum-mPloc': 0.5644,
 'MKSVM': 0.6889,
 'FSVM-KNR': 0.6916,
 'mGOF-loc': 0.6482}

In [59]:
for i in range(4):
    print(modelname[i])
    for t in ap_res4802[i]:
        print(' '.join(['{:.4f}'.format(x) for x in t]))
for i in range(4):
    for k,v in ap_old4802.items():
        # print(k)
        t, p = stats.ttest_1samp(ap_res4802[i], v, axis = 1)
        print(' '.join([str(x) for x in t]))
        print(' '.join([str(x) for x in p]))

BLSTM
0.6957 0.7075 0.7069 0.7022 0.7077 0.6996 0.6944 0.6986 0.6804 0.6959 0.7004 0.6735 0.7019 0.6851 0.7064 0.7175 0.7004 0.7201 0.6906 0.6874
0.7060 0.7039 0.7033 0.7078 0.6952 0.6938 0.7005 0.7025 0.7054 0.6997 0.7092 0.6843 0.7020 0.6966 0.6915 0.7030 0.6841 0.7083 0.6774 0.6803
0.7024 0.6894 0.6902 0.6654 0.6751 0.6901 0.6777 0.6881 0.6699 0.6920 0.6894 0.6859 0.6788 0.6770 0.6643 0.6890 0.6828 0.6871 0.6718 0.6842
0.6814 0.7151 0.6781 0.6891 0.6874 0.6819 0.6846 0.6535 0.6731 0.6932 0.6807 0.6784 0.6789 0.6867 0.6857 0.6872 0.6864 0.6756 0.6770 0.6912
0.6668 0.6727 0.6748 0.6764 0.6867 0.6888 0.6617 0.6726 0.6867 0.7046 0.6848 0.6805 0.6820 0.6882 0.6661 0.6884 0.6701 0.6687 0.7036 0.7043
BLSTM ConvNet1
0.7470 0.7637 0.7648 0.7548 0.7584 0.7606 0.7435 0.7513 0.7382 0.7568 0.7587 0.7546 0.7663 0.7673 0.7564 0.7517 0.7612 0.7537 0.7735 0.7497
0.7295 0.7465 0.7466 0.7663 0.7530 0.7425 0.7489 0.7724 0.7497 0.7444 0.7471 0.7478 0.7512 0.7470 0.7627 0.7385 0.7242 0.7332 0.7442 0.7321

In [62]:
for i in range(4):
    tmp = []
    for k,v in ap_old4802.items():
        # print(k)
        t, p = stats.ttest_1samp(ap_res4802[i], v, axis = None)
        tmp.append(t)
        tmp.append(p)
    print(' '.join([str(x) for x in tmp]))

86.97678841791493 2.5662281510630258e-95 93.03913763327085 3.523857818594751e-98 -0.14141512127387051 0.8878293746205668 -2.1621981930592056 0.03301218539697734 30.320018590452783 6.588573996732873e-52
151.8817380145168 4.2341525590814126e-119 158.78423983715263 5.292028468750761e-121 52.69023034108305 3.0896041752163344e-74 50.389396400204404 2.194119313321919e-72 87.37317159803108 1.645454460136085e-95
43.52927119706319 2.309890271603774e-66 50.923544858873484 8.027271334698195e-73 -62.729179943025485 1.616572042898825e-81 -65.19393783029562 3.893665711293299e-83 -25.57523697565773 2.0555357237755205e-45
171.8172859501642 2.2084615485049873e-124 180.00945640686143 2.227411560767502e-126 54.092762350218905 2.5000592777500607e-75 51.36203886465313 3.5419331120677386e-73 95.25589044745061 3.5131965222821165e-99


In [63]:
rl_old4802

{'IMMMLGP': 0.2436,
 'Hum-mPloc': 0.3145,
 'MKSVM': 0.0662,
 'FSVM-KNR': 0.0971,
 'mGOF-loc': 0.0606}

In [65]:
for i in range(4):
    print(modelname[i])
    for t in rl_res4802[i]:
        print(' '.join(['{:.4f}'.format(x) for x in t]))
for i in range(4):
    for k,v in rl_old4802.items():
        # print(k)
        t, p = stats.ttest_1samp(rl_res4802[i], v, axis = 1)
        print(' '.join([str(-x) for x in t]))
        print(' '.join([str(x) for x in p]))

BLSTM
0.0776 0.0693 0.0779 0.0748 0.0794 0.0744 0.0738 0.0783 0.0815 0.0745 0.0753 0.0826 0.0763 0.0764 0.0835 0.0741 0.0761 0.0733 0.0765 0.0822
0.0746 0.0816 0.0797 0.0695 0.0719 0.0762 0.0718 0.0757 0.0773 0.0755 0.0724 0.0842 0.0730 0.0772 0.0798 0.0768 0.0816 0.0724 0.0820 0.0890
0.0742 0.0761 0.0776 0.0832 0.0747 0.0783 0.0843 0.0767 0.0786 0.0715 0.0761 0.0822 0.0770 0.0849 0.0803 0.0725 0.0727 0.0788 0.0876 0.0808
0.0865 0.0741 0.0842 0.0793 0.0836 0.0799 0.0822 0.0939 0.0834 0.0818 0.0825 0.0879 0.0848 0.0879 0.0842 0.0779 0.0825 0.0836 0.0893 0.0828
0.1044 0.0933 0.0869 0.0905 0.0847 0.0890 0.0956 0.0922 0.0857 0.0776 0.0954 0.0892 0.0878 0.0862 0.0993 0.0876 0.0890 0.0961 0.0826 0.0785
BLSTM ConvNet1
0.0606 0.0501 0.0526 0.0508 0.0564 0.0522 0.0544 0.0489 0.0598 0.0583 0.0513 0.0573 0.0532 0.0553 0.0536 0.0578 0.0556 0.0592 0.0512 0.0594
0.0649 0.0608 0.0616 0.0538 0.0598 0.0590 0.0574 0.0543 0.0625 0.0606 0.0630 0.0575 0.0595 0.0580 0.0529 0.0644 0.0650 0.0657 0.0606 0.0673

In [67]:
for i in range(4):
    tmp = []
    for k,v in rl_old4802.items():
        # print(k)
        t, p = stats.ttest_1samp(rl_res4802[i], v, axis = None)
        tmp.append(-t)
        tmp.append(p)
    print(' '.join([str(x) for x in tmp]))

237.6623446132303 2.6891614615826614e-138 341.37096058628697 7.57639459649853e-154 -21.828607059834162 1.2743021285405641e-39 23.370211467210332 4.401505388645398e-42 -30.01997870227587 1.6026897196208895e-51
377.9269057242417 3.2273525675264238e-158 523.6878556688272 3.0981899627926715e-172 13.21615085584299 1.379071781625715e-23 76.74243228444233 5.171631516088818e-90 1.7032972636049397 0.0916498637627113
579.0293165815534 1.490490289294101e-176 812.7314942270312 3.9720632810142645e-191 -5.7205609122097005 1.131479983741947e-07 96.13271538179978 1.4319138548053039e-99 -24.179407101415276 2.489632516909136e-43
383.89578843994843 6.847834361460683e-159 535.2467975415358 3.570756994145199e-173 5.198059037669249 1.084100777431782e-06 71.16062858970099 8.000549653757436e-87 -6.756322499268523 9.842009135150243e-10


In [68]:
cov_old4802

{'IMMMLGP': 4.9772,
 'Hum-mPloc': 5.683,
 'MKSVM': 2.9753,
 'FSVM-KNR': 2.6339,
 'mGOF-loc': 3.0227}

In [69]:
for i in range(4):
    print(modelname[i])
    for t in cov_res4802[i]:
        print(' '.join(['{:.4f}'.format(x) for x in t]))
for i in range(4):
    for k,v in cov_old4802.items():
        # print(k)
        t, p = stats.ttest_1samp(cov_res4802[i], v, axis = 1)
        print(' '.join([str(-x) for x in t]))
        print(' '.join([str(x) for x in p]))

BLSTM
3.6460 3.3570 3.7290 3.6800 3.8550 3.6610 3.5660 3.6460 3.8170 3.5330 3.6310 3.9590 3.6200 3.6440 3.9240 3.6360 3.5790 3.5820 3.6010 3.9700
3.6030 3.9680 3.8140 3.3180 3.5380 3.6160 3.5020 3.6370 3.6890 3.7080 3.5690 4.0660 3.6250 3.6870 3.8860 3.7230 3.8530 3.4070 3.8910 4.3010
3.5720 3.6020 3.6620 3.8820 3.5350 3.7670 3.9820 3.6940 3.8220 3.3830 3.6620 3.8540 3.7440 3.8980 3.6220 3.3340 3.5190 3.7150 4.0100 3.8810
4.1100 3.6130 4.0130 3.8280 3.9890 3.6420 3.9650 4.3770 3.9770 3.8710 3.8830 4.0480 3.9300 4.1140 3.9870 3.6890 3.9170 4.0290 4.1440 3.9450
4.7210 4.4320 4.2760 4.2740 4.0560 4.3080 4.5370 4.3530 3.9800 3.7950 4.5150 4.1220 4.1850 3.8890 4.6610 4.1490 4.2760 4.5680 4.1110 3.6370
BLSTM ConvNet1
2.9870 2.5760 2.5650 2.6030 2.8860 2.5160 2.7550 2.4940 2.8910 2.9120 2.5080 2.7440 2.7360 2.6900 2.6110 2.7240 2.6720 2.8600 2.5420 2.9070
3.2420 2.9420 2.8920 2.6070 2.8800 2.7940 2.6750 2.6540 2.9730 2.9830 2.9840 2.7780 2.7890 2.8650 2.6290 3.0690 3.0600 3.1240 2.9400 3.0950

In [70]:
for i in range(4):
    tmp = []
    for k,v in cov_old4802.items():
        # print(k)
        t, p = stats.ttest_1samp(cov_res4802[i], v, axis = None)
        tmp.append(-t)
        tmp.append(p)
    print(' '.join([str(x) for x in tmp]))

37.2925171939245 4.2928307111423516e-60 60.87199047142181 2.940474801389181e-80 -29.587261148696445 5.849519253517191e-51 -40.99280404602936 6.463714952608822e-64 -28.003714771035103 7.622955368945212e-49
99.94478291591062 3.1681207617534104e-101 133.86216293122453 1.0729287098087255e-113 3.7430217191737047 0.0003052023548429008 -12.663033164969336 2.0007366358951736e-22 6.020839532613607 2.954284180821566e-08
120.95539070083026 2.309539206105137e-109 171.40648502910454 2.7968513407219297e-124 -22.14186877178556 3.942425221551105e-40 -46.54538762085445 4.1315053924685294e-69 -18.753682499243475 2.3559174841027517e-34
96.20950227213508 1.3241823445936208e-99 131.724309571089 5.23531854413681e-113 -4.523131238457006 1.6945643691501242e-05 -21.70187197501533 2.0548350089176483e-39 -2.1380336669489006 0.034975590127473664


In [76]:
for d in [ap_res3106, rl_res3106, cov_res3106, ap_res4802, rl_res4802, cov_res4802]:
    for i in range(4):
        # print(modelname[i])
        for t in d[i]:
            print('{:.4f}'.format(np.average(t)),'{:.4f}'.format(np.std(t)))
        print('{:.4f}'.format(np.average(d[i], axis = None)), '{:.4f}'.format(np.std(d[i], axis = None)))

0.7483 0.0075
0.7600 0.0122
0.7496 0.0089
0.7398 0.0114
0.7565 0.0075
0.7508 0.0120
0.7915 0.0055
0.7860 0.0077
0.7887 0.0075
0.7868 0.0096
0.7926 0.0068
0.7891 0.0080
0.6317 0.0090
0.6533 0.0060
0.6480 0.0105
0.6444 0.0091
0.6410 0.0108
0.6437 0.0117
0.7930 0.0096
0.7907 0.0086
0.7973 0.0105
0.7867 0.0086
0.7869 0.0095
0.7909 0.0102
0.0957 0.0046
0.0934 0.0060
0.0980 0.0060
0.1073 0.0058
0.0905 0.0047
0.0970 0.0079
0.0777 0.0035
0.0797 0.0032
0.0754 0.0036
0.0792 0.0043
0.0738 0.0025
0.0772 0.0041
0.1365 0.0049
0.1245 0.0040
0.1275 0.0066
0.1303 0.0051
0.1310 0.0060
0.1300 0.0067
0.0743 0.0042
0.0757 0.0041
0.0708 0.0048
0.0779 0.0039
0.0776 0.0040
0.0753 0.0049
1.5564 0.0684
1.5288 0.0783
1.6055 0.0915
1.7579 0.0832
1.4990 0.0669
1.5895 0.1201
1.3321 0.0594
1.3481 0.0475
1.2743 0.0520
1.3122 0.0606
1.2461 0.0312
1.3026 0.0635
2.1174 0.0766
1.9457 0.0658
1.9832 0.0958
2.0165 0.0831
2.0407 0.0863
2.0207 0.1005
1.2574 0.0599
1.2892 0.0634
1.2093 0.0640
1.3126 0.0520
1.3145 0.0572
1.2766