In [5]:
import numpy as np
import os
import pickle as pkl
from sklearn.metrics import f1_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import log_loss

In [6]:
logs_folder = "logs"
svm_model_names = ["svm_rbf_1pc",
                   "svm_rbf_10pc",
                   "svm_wd_1pc",
                   "svm_wd_10pc",
                   "svm_wd_50pc",
                   "cascade_wd_1pc",
                   "cascade_wd_10pc",
                   "cascade_wd_50pc",
                   "cascade_wd_100pc"]
dl_model_names = ["cnn",
                  "lstm",
                  "lstm_cnn",
                  "bilstm",
                  "bilstm_cnn"]

In [7]:
for model_name in svm_model_names:
    y_pred = pkl.load(open(os.path.join(logs_folder, model_name,  model_name + "_y_proba_test.pkl"), "rb"))
    y_test = pkl.load(open(os.path.join(logs_folder, model_name, model_name + "_y_test.pkl"), "rb"))

    # Get the best threshold to maximize F1 score
    precision, recall, thresholds = precision_recall_curve(y_test, y_pred)
    f1_scores = 2 * (precision * recall) / (precision + recall)

    # Remove NaNs
    f1_scores = f1_scores[~np.isnan(f1_scores)]
    best_f1 = np.max(f1_scores)

    # Get the threshold that maximizes F1 score
    best_threshold = thresholds[np.argmax(f1_scores)]

    # Get AUC
    auc = roc_auc_score(y_test, y_pred)

    # Get Accuracy
    y_pred_binary = np.where(y_pred > best_threshold, 1, 0)
    acc = accuracy_score(y_test, y_pred_binary)

    # Get Binary Cross Entropy
    bce = log_loss(y_test, y_pred)

    print(f"Model: {model_name}")
    print(f"\tAUC: {auc}")
    print(f"\tBCE: {bce}")
    print(f"\tAccuracy: {acc}")
    print(f"\tBest F1 score: {best_f1}")
    print(f"\tBest threshold: {best_threshold}")
    print(f"")



Model: svm_rbf_1pc
	AUC: 0.7605720086816495
	BCE: 0.25375888792174306
	Accuracy: 0.8706705969049373
	Best F1 score: 0.3542435424354243
	Best threshold: 0.2007935477945658

Model: svm_rbf_10pc
	AUC: 0.805303548786637
	BCE: 0.2456143603291095
	Accuracy: 0.8850248664579112
	Best F1 score: 0.42541436464088395
	Best threshold: 0.21305174595278065

Model: svm_wd_1pc
	AUC: 0.763684099978996
	BCE: 0.24987093041015374
	Accuracy: 0.895725865880619
	Best F1 score: 0.37610619469026546
	Best threshold: 0.24552023941789383

Model: svm_wd_10pc
	AUC: 0.8145622477770108
	BCE: 0.24022914628788306
	Accuracy: 0.8974396758150672
	Best F1 score: 0.44060301507537686
	Best threshold: 0.23663770839222212

Model: svm_wd_50pc
	AUC: 0.8411283089030662
	BCE: 0.22527516907527714
	Accuracy: 0.9004759589172303
	Best F1 score: 0.47161913703399444
	Best threshold: 0.21851200895707074

Model: cascade_wd_1pc
	AUC: 0.7638433802422461
	BCE: 0.24989481460289692
	Accuracy: 0.8909358879882093
	Best F1 score: 0.381551362683438

  import sys
  import sys
  import sys


In [13]:
for model_name in dl_model_names:
    aucs = []
    bces = []
    accs = []
    f1s = []
    for fold in range(1, 6):
        y_pred = pkl.load(open(os.path.join(logs_folder, model_name,  model_name + "_" + str(fold) + "_y_pred.pkl"), "rb"))
        y_test = pkl.load(open(os.path.join(logs_folder, model_name, model_name + "_" + str(fold) + "_y_test.pkl"), "rb"))

        # Get the best threshold to maximize F1 score
        precision, recall, thresholds = precision_recall_curve(y_test, y_pred)
        f1_scores = 2 * (precision * recall) / (precision + recall)

        # Remove NaNs
        f1_scores = f1_scores[~np.isnan(f1_scores)]
        best_f1 = np.max(f1_scores)

        # Get the threshold that maximizes F1 score
        best_threshold = thresholds[np.argmax(f1_scores)]

        # Get AUC
        auc = roc_auc_score(y_test, y_pred)

        # Get Accuracy
        y_pred_binary = np.where(y_pred > best_threshold, 1, 0)
        acc = accuracy_score(y_test, y_pred_binary)

        # Get Binary Cross Entropy
        bce = log_loss(y_test, y_pred)

        aucs.append(auc)
        bces.append(bce)
        accs.append(acc)
        f1s.append(best_f1)

    print(f"Model: {model_name}")
    print(f"\tAUCs: {aucs}")
    print(f"\tMean AUC: {np.mean(aucs)}")
    print(f"\tBCEs: {bces}")
    print(f"\tMean BCE: {np.mean(bces)}")
    print(f"\tAccuracies: {accs}")
    print(f"\tMean Accuracy: {np.mean(accs)}")
    print(f"\tBest F1 scores: {f1s}")
    print(f"\tMean F1 score: {np.mean(f1s)}")
    print(f"")



Model: cnn
	AUCs: [0.8646898420352422, 0.8624803123643846, 0.8667930850234149, 0.869498658408072, 0.8558056391713512]
	Mean AUC: 0.8638535074004932
	BCEs: [0.2191533011711589, 0.2202144586958365, 0.21706956373718111, 0.2168281029006575, 0.22041746684123603]
	Mean BCE: 0.218736578669214
	Accuracies: [0.8979193374887642, 0.8968804797901655, 0.9052687031224673, 0.9013932481617376, 0.9026163004877473]
	Mean Accuracy: 0.9008156138101764
	Best F1 scores: [0.49609921984396876, 0.48813137778428, 0.5004080046629105, 0.4996635262449529, 0.49994324845824983]
	Mean F1 score: 0.4968490753988724

Model: lstm
	AUCs: [0.9161555083599178, 0.9161426486840718, 0.9170567239579034, 0.9101864796749368, 0.916407092466247]
	Mean AUC: 0.9151896906286152
	BCEs: [0.17597896551588443, 0.1758352353718125, 0.17538831009743364, 0.18241176179152788, 0.17511661683772237]
	Mean BCE: 0.17694617792287617
	Accuracies: [0.9281567275245716, 0.92627425843243, 0.9318443039653419, 0.9278288610877798, 0.9283261865817898]
	Mean 

  if sys.path[0] == "":
  loss = -(transformed_labels * np.log(y_pred)).sum(axis=1)


Model: lstm_cnn
	AUCs: [0.9131604292306349, 0.9131270810052668, 0.9126970942728334, 0.9135971209405636, 0.9111315456064472]
	Mean AUC: 0.9127426542111492
	BCEs: [0.18002166019584465, 0.17994519395892078, 0.18010478973328256, 0.17977482876807654, inf]
	Mean BCE: inf
	Accuracies: [0.9246607134963087, 0.9242002269277809, 0.9230913752707661, 0.9266500154723487, 0.925692203409811]
	Mean Accuracy: 0.9248589069154031
	Best F1 scores: [0.5920280892151777, 0.5924517087667163, 0.5895399134880064, 0.5926092650188246, 0.5903404013323585]
	Mean F1 score: 0.5913938755642167

Model: bilstm
	AUCs: [0.9007836049642195, 0.9244689729053319, 0.9219598174267964, 0.9213560963992251, 0.9174661776275277]
	Mean AUC: 0.9172069338646202
	BCEs: [0.1874940245779584, 0.16792981718234692, 0.17103475237913424, 0.1702295514301942, 0.17497794985892665]
	Mean BCE: 0.17433321908571206
	Accuracies: [0.9220709370349086, 0.9339441227178286, 0.9310854221004081, 0.9339662260731179, 0.9303154885578298]
	Mean Accuracy: 0.930276

  if sys.path[0] == "":
  loss = -(transformed_labels * np.log(y_pred)).sum(axis=1)


Model: bilstm_cnn
	AUCs: [0.9122542562107955, 0.9133413831881478, 0.9121173511782544, 0.9108150719287971, 0.9086462094004933]
	Mean AUC: 0.9114348543812977
	BCEs: [0.18025497072629545, 0.17918581666582434, 0.18091052692891366, inf, 0.18310311355128436]
	Mean BCE: inf
	Accuracies: [0.9237986826400247, 0.9250512061064203, 0.9255669510631714, 0.9199011243240057, 0.9207778907504826]
	Mean Accuracy: 0.923019170976821
	Best F1 scores: [0.5906220929081805, 0.5920757138273981, 0.5891993005571143, 0.585487683977732, 0.5829163272431049]
	Mean F1 score: 0.5880602237027059

