In [1]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import precision_score, accuracy_score,recall_score, f1_score
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import roc_curve, auc
from prettytable import PrettyTable
from tensorflow.keras.utils import to_categorical

In [2]:
# finetune
file_path = 'middle_output_tenfold_cpgenie/'
model_size = 'small'

acc_list = []
precision_list = []
recall_list = []
f1_list = []
auc_list = []
for f in range(10):
    fold = str(f+1)
    # training model

    X_train = np.load(file_path + model_size + '_split' + fold + '_output.npy')
    Y_train = np.load(file_path + model_size + '_split' + fold + '_label.npy').astype("int").argmax(axis=1)

    clf = DecisionTreeClassifier()
    clf.fit(X_train,Y_train)

    # prediction output
    X_test = np.load(file_path + model_size + '_split' + fold + '_output_test.npy')
    label = np.load(file_path + model_size + '_split' + fold + '_label_test.npy').astype("int").argmax(axis=1)

    y_score = clf.predict(X_test)                                   
    y_score_pro = clf.predict_proba(X_test) # (.., 2)

    y_one_hot = to_categorical(label)
    y_score_one_hot = to_categorical(y_score)

    acc = accuracy_score(label, y_score)
    precision = precision_score(label, y_score)
    recall = recall_score(label, y_score)
    f1 = f1_score(label, y_score)
    fpr, tpr, thresholds = roc_curve(y_one_hot.ravel(),y_score_pro.ravel()) 
    auc_ = auc(fpr, tpr)

    table = PrettyTable(['ACC','Precision','Recall','F1-score','AUC'])
    table.add_row([acc,precision,recall,f1,auc_])
    print(table)

    acc_list.append(acc)
    precision_list.append(precision)
    recall_list.append(recall)
    f1_list.append(f1)
    auc_list.append(auc_)

print('(mean) ACC: ', np.mean(acc_list), 'Precision: ', np.mean(precision_list), 'Recall: ', np.mean(recall_list), 'F1: ', np.mean(f1_list), 'AUC: ', np.mean(auc_list))
print('(std) ACC: ', np.std(acc_list), 'Precision: ', np.std(precision_list), 'Recall: ', np.std(recall_list), 'F1: ', np.std(f1_list), 'AUC: ', np.std(auc_list))

+---------------------+-----------+--------+----------+---------------------+
|         ACC         | Precision | Recall | F1-score |         AUC         |
+---------------------+-----------+--------+----------+---------------------+
| 0.48742138364779874 |    0.0    |  0.0   |   0.0    | 0.48742138364779874 |
+---------------------+-----------+--------+----------+---------------------+
+--------------------+--------------------+--------------------+--------------------+--------------------+
|        ACC         |     Precision      |       Recall       |      F1-score      |        AUC         |
+--------------------+--------------------+--------------------+--------------------+--------------------+
| 0.9106918238993711 | 0.9191542288557214 | 0.9056372549019608 | 0.9123456790123456 | 0.9462910881689807 |
+--------------------+--------------------+--------------------+--------------------+--------------------+
+--------------------+--------------------+--------+--------------------+--

  _warn_prf(average, modifier, msg_start, len(result))


In [3]:
# finetune
file_path = 'middle_output_tenfold_cpgenie/'
model_size = 'large'

acc_list = []
precision_list = []
recall_list = []
f1_list = []
auc_list = []
for f in range(10):
    fold = str(f+1)
    # training model

    X_train = np.load(file_path + model_size + '_split' + fold + '_output.npy')
    Y_train = np.load(file_path + model_size + '_split' + fold + '_label.npy').astype("int").argmax(axis=1)

    clf = DecisionTreeClassifier()
    clf.fit(X_train,Y_train)

    # prediction output
    X_test = np.load(file_path + model_size + '_split' + fold + '_output_test.npy')
    label = np.load(file_path + model_size + '_split' + fold + '_label_test.npy').astype("int").argmax(axis=1)

    y_score = clf.predict(X_test)                                   
    y_score_pro = clf.predict_proba(X_test) # (.., 2)

    y_one_hot = to_categorical(label)
    y_score_one_hot = to_categorical(y_score)

    acc = accuracy_score(label, y_score)
    precision = precision_score(label, y_score)
    recall = recall_score(label, y_score)
    f1 = f1_score(label, y_score)
    fpr, tpr, thresholds = roc_curve(y_one_hot.ravel(),y_score_pro.ravel()) 
    auc_ = auc(fpr, tpr)

    table = PrettyTable(['ACC','Precision','Recall','F1-score','AUC'])
    table.add_row([acc,precision,recall,f1,auc_])
    print(table)

    acc_list.append(acc)
    precision_list.append(precision)
    recall_list.append(recall)
    f1_list.append(f1)
    auc_list.append(auc_)

print('(mean) ACC: ', np.mean(acc_list), 'Precision: ', np.mean(precision_list), 'Recall: ', np.mean(recall_list), 'F1: ', np.mean(f1_list), 'AUC: ', np.mean(auc_list))
print('(std) ACC: ', np.std(acc_list), 'Precision: ', np.std(precision_list), 'Recall: ', np.std(recall_list), 'F1: ', np.std(f1_list), 'AUC: ', np.std(auc_list))

+-------------------+--------------------+--------------------+--------------------+--------------------+
|        ACC        |     Precision      |       Recall       |      F1-score      |        AUC         |
+-------------------+--------------------+--------------------+--------------------+--------------------+
| 0.841714756801319 | 0.8416801292407108 | 0.8471544715447155 | 0.8444084278768232 | 0.8981451967521403 |
+-------------------+--------------------+--------------------+--------------------+--------------------+
+--------------------+-----------+--------+----------+--------------------+
|        ACC         | Precision | Recall | F1-score |        AUC         |
+--------------------+-----------+--------+----------+--------------------+
| 0.4830997526793075 |    0.0    |  0.0   |   0.0    | 0.4830997526793075 |
+--------------------+-----------+--------+----------+--------------------+
+--------------------+--------------------+--------------------+--------------------+-----

  _warn_prf(average, modifier, msg_start, len(result))
