In [1]:
import pandas as pd 
from sklearn.metrics import *
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE,ADASYN
#import optuna
import lightgbm as lgb
import xgboost as xgb
import matplotlib.pyplot as plt
from collections import Counter
import seaborn as sns 
from sklearn.ensemble import RandomForestClassifier
%matplotlib inline


  import pandas.util.testing as tm


In [2]:
plt.rcParams["font.family"] = "Times New Roman"

In [3]:
def precision_recall_thershold(pred_proba, y_test):
    t_recall_nodiab, t_recall_diab = [], []
    t_precision_nodiab, t_precision_diab = [], []

    for thresh in np.arange(0, 1, 0.01):
        precision, recall, fscore, support = \
                precision_recall_fscore_support(
                        y_test,
                        np.where(pred_proba[:,0] > thresh, 0, 1))
        recall_nodiab, recall_diab = recall
        precision_nodiab, precision_diab = precision

        t_recall_nodiab.append(recall_nodiab)
        t_recall_diab.append(recall_diab)

        t_precision_nodiab.append(precision_nodiab)
        t_precision_diab.append(precision_diab)

    return t_precision_nodiab, t_precision_diab, \
            t_recall_nodiab, t_recall_diab


In [4]:
def roc_interp(fpr_tpr):
    linsp = np.linspace(0, 1, 100)
    n_boot = len(fpr_tpr)
    ys = []
    for n in fpr_tpr:
        x, y = n
        interp = np.interp(linsp, x, y)
        ys.append(interp)
    return ys




In [5]:
def plot_recall_vs_decision_boundary(
        t_recall_diab,
        t_recall_nodiab,
        filename):

    plt.figure(figsize=(10,7))
    plt.plot(np.arange(0, 1, 0.01), t_recall_diab,   label='Covid-yes')
    plt.plot(np.arange(0, 1, 0.01), t_recall_nodiab, label='Covid-no')
    plt.plot([.5, .5], [0, 1], 'k--')
    plt.plot([.77, .77], [0, 1], 'k--')
    plt.ylim([0.0, 1.01])
    plt.xlim([0.0, 1.0])
    plt.legend(loc='upper left', fontsize=8)
    plt.title('Recall vs. Decision Boundary\n',
              fontsize=12)
    plt.xlabel('Decision Boundary (T)', fontsize=12)
    plt.ylabel('Recall Rate', fontsize=12)
    plt.tick_params(axis='both', which='major', labelsize=12)
    plt.savefig(filename,dpi=600)
    plt.show()




In [6]:
def plot_multi_recall_vs_decision_boundary(
        probas,
        y_test,
        filename):

    f, (ax1, ax2) = plt.subplots(1, 2, figsize=(13,6))
    ax1.plot([.5, .5], [0, 1], 'k--')
    ax2.plot([.5, .5], [0, 1], 'k--')
    ax1.set_ylim([-0.01, 1.009])
    ax1.set_xlim([0.0, 1])
    ax2.set_ylim([-0.01, 1.009])
    ax2.set_xlim([0.0, 1])
    ax1.set_xlabel('Decision Boundary (T)\n'
                   '(a)\n', fontsize=12)
    ax1.set_ylabel('Recall Rate', fontsize=12)
    ax1.tick_params(axis='both', which='major', labelsize=12)
    ax2.set_xlabel('Decision Boundary (T)\n'
                   '(b)\n', fontsize=12)
    ax2.set_ylabel('Recall Rate', fontsize=12)
    ax2.tick_params(axis='both', which='major', labelsize=12)
    for p in probas:
        t_prec_nodiab, t_prec_diab, t_recall_nodiab, t_recall_diab = \
                precision_recall_thershold(probas[p], y_test)
        ax1.plot(np.arange(0, 1, 0.01), t_recall_diab,   label=p)
        ax1.set_title('COVID-yes Class\n'
                      'Recall vs. Decision Boundary',
                      fontsize=12)
        ax2.plot(np.arange(0, 1, 0.01), t_recall_nodiab, label=p)
        ax2.set_title('COVID-no Class\n'
                      'Recall vs. Decision Boundary',
                      fontsize=12)
    ax1.legend(loc='lower right')
    ax2.legend(loc='lower left')
    plt.savefig(filename,dpi=600)
    plt.show()





In [7]:
def plot_roc_curves(df_preds, y_test, filename):
    plt.figure(figsize=(8,8))
    for model in df_preds.columns:
        fpr, tpr, thresholds = roc_curve(y_test,
                                                 df_preds.loc[:,model])
        print('{}\n  AUC: {}'.format(model, round(auc(fpr, tpr), 3)))
        plt.plot(fpr, tpr, label=model)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([-0.05, 1.0])
    plt.ylim([-0.01, 1.009])
    plt.legend(loc='lower right', fontsize=12)
    plt.xlabel('False Positive Rate', fontsize=12)
    plt.ylabel('True Positive Rate', fontsize=12)
    plt.title('ROC Curve', fontsize=12)
    plt.tick_params(axis='both', which='major', labelsize=12)
    plt.savefig(filename,dpi=600)
    plt.show()




In [8]:
def plot_bootstrap_roc(m, ci, filename):
    x = np.linspace(0,1,100)
    plt.figure(figsize=(8,8))
    plt.plot(x, m, c='blue', label='ROC Mean')
    plt.plot(x, ci[0], c='grey', label='95% CI')
    plt.plot(x, ci[1], c='grey')
    plt.fill_between(x, ci[0], ci[1], color='grey', alpha=0.25)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.0])
    plt.legend(loc='lower right', fontsize=12)
    plt.xlabel('False Positive Rate', fontsize=12)
    plt.ylabel('True Positive Rate', fontsize=12)
    plt.title('Bootstrap ROC Curve', fontsize=12)
    plt.tick_params(axis='both', which='major', labelsize=12)
    plt.savefig(filename,dpi=600)
    plt.show()

In [9]:
def plot_multi_precison_vs_recall_curve(
        probas,
        y_test,
        filename):

    plt.figure(figsize=(8,8))
    for p in probas:
        precision,recall,threshold=precision_recall_curve(y_test,probas[p])
        
        plt.plot(recall, precision, label=p)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([-0.05, 1.0])
    plt.ylim([-0.01, 1.009])
    plt.legend(loc='lower right', fontsize=12)
    plt.xlabel('recall', fontsize=12)
    plt.ylabel('precision', fontsize=12)
    plt.title('precison_vs_recall_curve', fontsize=12)
    plt.tick_params(axis='both', which='major', labelsize=12)
    plt.savefig(filename,dpi=600)
    plt.show()