#### prediction performance of Enformer

In [1]:
import numpy as np
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import precision_score, accuracy_score,recall_score, f1_score
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import roc_curve, auc
from prettytable import PrettyTable
import pandas as pd

In [2]:
result_path = '../../datasets/tissue_specific/expecto_results/'
origin_path = '../../datasets/tissue_specific/'

compare_tissue_list = ['Adipose_Subcutaneous','Artery_Tibial','Breast_Mammary_Tissue','Colon_Transverse','Nerve_Tibial','Thyroid']
compare_tissue_list2 = ['Adipose Subcutaneous','Artery Tibial','Breast Mammary Tissue','Colon Transverse','Nerve Tibial','Thyroid | GTEx']
model_size_list = ['small','middle']

In [3]:
for i in range(6):
    tissue_origin = compare_tissue_list[i]
    tissue_pred = compare_tissue_list2[i]
    for model_size in model_size_list:
        origin_data = pd.read_pickle(origin_path + model_size + '/test_' + model_size + '_' + tissue_origin + '.pkl')[['phenotype_id','variant_id','tss_distance','label','bulk']]
        pred_data = pd.read_csv(result_path + 'test_' + model_size + '_' + tissue_origin + '.csv')
        data_len = len(origin_data)
        label = np.array(origin_data['label'])
        y_score_middle = pred_data[tissue_pred][:data_len]
        y_score = np.where(y_score_middle > 0, 1, 0)
        y_score_pro = np.array([(0, 1) if x > 0 else (1, 0) for x in y_score_middle])
        y_one_hot = to_categorical(label)
        y_score_one_hot = to_categorical(y_score)

        acc = np.round(accuracy_score(label, y_score),3)
        precision = np.round(precision_score(label, y_score),3)
        recall = np.round(recall_score(label, y_score),3)
        f1 = np.round(f1_score(label, y_score),3)
        fpr, tpr, thresholds = roc_curve(y_one_hot.ravel(),y_score_pro.ravel()) 
        auc_ = np.round(auc(fpr, tpr),3)

        print('tissue: ', tissue_origin)
        print('model size: ', model_size)
        table = PrettyTable(['ACC','Precision','Recall','F1-score','AUC'])
        table.add_row([acc,precision,recall,f1,auc_])
        print(table)
        

tissue:  Adipose_Subcutaneous
model size:  small
+------+-----------+--------+----------+------+
| ACC  | Precision | Recall | F1-score | AUC  |
+------+-----------+--------+----------+------+
| 0.41 |    0.48   | 0.545  |  0.511   | 0.41 |
+------+-----------+--------+----------+------+
tissue:  Adipose_Subcutaneous
model size:  middle
+-------+-----------+--------+----------+-------+
|  ACC  | Precision | Recall | F1-score |  AUC  |
+-------+-----------+--------+----------+-------+
| 0.508 |    0.6    | 0.353  |  0.444   | 0.508 |
+-------+-----------+--------+----------+-------+
tissue:  Artery_Tibial
model size:  small
+-------+-----------+--------+----------+-------+
|  ACC  | Precision | Recall | F1-score |  AUC  |
+-------+-----------+--------+----------+-------+
| 0.444 |   0.481   | 0.542  |   0.51   | 0.444 |
+-------+-----------+--------+----------+-------+
tissue:  Artery_Tibial
model size:  middle
+-------+-----------+--------+----------+-------+
|  ACC  | Precision | Reca