In [19]:
import pandas as pd
import numpy as np

from sklearn.metrics import roc_auc_score, precision_recall_fscore_support, accuracy_score, roc_curve,\
                            confusion_matrix

In [38]:
logit_scores = pd.read_csv("../output/scores/y_scores_test_logit.csv", sep = "^")
rf_scores = pd.read_csv("../output/scores/y_scores_test_rf.csv", sep = "^")
xg_scores = pd.read_csv("../output/scores/y_scores_test_xg.csv", sep = "^")
nn_scores = pd.read_csv("../output/scores/y_scores_test_encoded_logit.csv", sep = "^")

loan_status = logit_scores["loan_status"]

In [43]:
result = []
for threshold in np.arange(0.0, 1.0, 0.05):
    # predictions
    logit_predictions = logit_scores["scores"] > threshold
    rf_predictions = rf_scores["scores"] > threshold
    xg_predictions = xg_scores["scores"] > threshold
    nn_predictions = nn_scores["scores"] > threshold
    
    # metrics
    # logit
    logit_accuracy = accuracy_score(loan_status, logit_predictions)
    logit_precision = precision_recall_fscore_support(loan_status, logit_predictions)[1][1]
    logit_recall = precision_recall_fscore_support(loan_status, logit_predictions)[0][1]
    tn, fp, fn, tp = confusion_matrix(loan_status, logit_predictions).ravel()
    specificity = tn / (tn+fp)
    logit_fpr = 1 - specificity
    
    # rf
    rf_accuracy = accuracy_score(loan_status, rf_predictions)
    rf_precision = precision_recall_fscore_support(loan_status, rf_predictions)[1][1]
    rf_recall = precision_recall_fscore_support(loan_status, rf_predictions)[0][1]
    tn, fp, fn, tp = confusion_matrix(loan_status, rf_predictions).ravel()
    specificity = tn / (tn+fp)
    rf_fpr = 1 - specificity
    
    # xg
    xg_accuracy = accuracy_score(loan_status, xg_predictions)
    xg_precision = precision_recall_fscore_support(loan_status, xg_predictions)[1][1]
    xg_recall = precision_recall_fscore_support(loan_status, xg_predictions)[0][1]
    tn, fp, fn, tp = confusion_matrix(loan_status, xg_predictions).ravel()
    specificity = tn / (tn+fp)
    xg_fpr = 1 - specificity
    
    # nn
    nn_accuracy = accuracy_score(loan_status, nn_predictions)
    nn_precision = precision_recall_fscore_support(loan_status, nn_predictions)[1][1]
    nn_recall = precision_recall_fscore_support(loan_status, nn_predictions)[0][1]
    tn, fp, fn, tp = confusion_matrix(loan_status, nn_predictions).ravel()
    specificity = tn / (tn+fp)
    nn_fpr = 1 - specificity
    
    partial_result = [threshold, 
                      logit_accuracy, logit_precision, logit_recall, logit_fpr,
                      rf_accuracy, rf_precision, rf_recall, rf_fpr,
                      xg_accuracy, xg_precision, xg_recall, xg_fpr,
                      nn_accuracy, nn_precision, nn_recall, nn_fpr]
    
    result.append(partial_result)

  'precision', 'predicted', average, warn_for)


In [44]:
report = pd.DataFrame(result)

In [45]:
report.columns = ["threshold",
                  "logit_accuracy", "logit_precision", "logit_recall", "logit_fpr",
                  "rf_accuracy", "rf_precision", "rf_recall", "rf_fpr",
                  "xg_accuracy", "xg_precision", "xg_recall", "xg_fpr",
                  "nn_accuracy", "nn_precision", "nn_recall", "nn_fpr"]

In [47]:
report

Unnamed: 0,threshold,logit_accuracy,logit_precision,logit_recall,logit_fpr,rf_accuracy,rf_precision,rf_recall,rf_fpr,xg_accuracy,xg_precision,xg_recall,xg_fpr,nn_accuracy,nn_precision,nn_recall,nn_fpr
0,0.0,0.21073,1.0,0.21073,1.0,0.21073,1.0,0.21073,1.0,0.21073,1.0,0.21073,1.0,0.21073,1.0,0.21073,1.0
1,0.05,0.314529,0.991938,0.234129,0.866334,0.323433,0.996052,0.237003,0.856151,0.366845,0.989094,0.248343,0.799291,0.343553,0.987187,0.241396,0.828293
2,0.1,0.482658,0.92697,0.280141,0.635971,0.424611,0.961863,0.263222,0.718832,0.492598,0.935836,0.285359,0.625744,0.49656,0.922421,0.285237,0.617142
3,0.15,0.608756,0.82119,0.328608,0.447963,0.546049,0.882109,0.302257,0.543677,0.602573,0.849893,0.328684,0.46346,0.609031,0.829787,0.32995,0.44991
4,0.2,0.6903,0.70748,0.375397,0.314288,0.643638,0.775391,0.345868,0.39154,0.681784,0.750167,0.373143,0.336474,0.68473,0.726783,0.372776,0.326497
5,0.25,0.742263,0.599291,0.421546,0.219564,0.718675,0.640339,0.396327,0.26041,0.735467,0.64365,0.417245,0.240019,0.736517,0.62137,0.416168,0.232739
6,0.3,0.773317,0.499799,0.464798,0.153656,0.76712,0.498762,0.452336,0.16123,0.771054,0.544661,0.463239,0.168501,0.7697,0.520976,0.459083,0.163892
7,0.35,0.789898,0.408671,0.501828,0.108317,0.791329,0.37592,0.506582,0.09776,0.792393,0.448648,0.508397,0.115829,0.788798,0.428041,0.498694,0.114882
8,0.4,0.800317,0.334203,0.542551,0.075234,0.801523,0.277098,0.558605,0.058459,0.803257,0.362237,0.550427,0.078994,0.800063,0.346481,0.539905,0.078833
9,0.45,0.805421,0.270005,0.582702,0.051627,0.804709,0.18898,0.620224,0.030896,0.807846,0.285729,0.591195,0.052752,0.805901,0.277064,0.583034,0.052904


In [48]:
report.to_csv("../output/models_report.csv", sep = ",", index = False)