## IPW vs Not AUROC Estimates

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score, brier_score_loss, average_precision_score
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibratedClassifierCV, calibration_curve
from sklearn.model_selection import cross_val_predict, StratifiedKFold
import matplotlib.pyplot as plt
import seaborn as sns
import glob, os


### Read in probability of observing labels estimates

In [None]:
path = '/Users/conorcorbin/repos/er_infection/data/results/ast_models_c/yhats.csv'
df_p_labels = pd.read_csv(path)
df_p_labels.head()

### Calibrate Predictions (Isotonic)

In [None]:
class IdentityEstimator(LogisticRegression):
    def __init__(self):
        LogisticRegression.__init__(self)
            
    def predict_proba(self, input_array):   
        return input_array*1

    def decision_function(self, input_array):
        return input_array*1

def calibrate_probabilities(predictions, labels):
    """ 
    Takes in test set probabilites and does a k-fold cross fitting procedure to recalibrate each model
    """
    est = IdentityEstimator()
    X = predictions.values.reshape(-1, 1)
    y = labels
    isotonic_calibrated_predictions = np.array([float(i) for i in range(len(y))])
    sigmoid_calibrated_predictions = np.array([float(i) for i in range(len(y))])

    # Fit base estimator
    est.fit(X, y) # because we've overloaded predict_proba and decision function this doesn't matter

    # Calibrated with isotonic calibration
    isotonic = CalibratedClassifierCV(est, cv='prefit', method='isotonic')

    # Calibrated with sigmoid calibration
    sigmoid = CalibratedClassifierCV(est, cv='prefit', method='sigmoid')

    cv = StratifiedKFold(n_splits=10)
    for train_inds, val_inds in cv.split(X, y):
        X_train, y_train = X[train_inds], y[train_inds]
        X_val, y_val = X[val_inds], y[val_inds]
        isotonic.fit(X_train, y_train)
        isotonic_predictions = isotonic.predict_proba(X_val)
        isotonic_calibrated_predictions[val_inds] = isotonic_predictions[:, 1]

        sigmoid.fit(X_train, y_train)
        sigmoid_predictions = sigmoid.predict_proba(X_val)
        sigmoid_calibrated_predictions[val_inds] = sigmoid_predictions[:, 1]

    
    return sigmoid_calibrated_predictions, isotonic_calibrated_predictions

s_preds, i_preds = calibrate_probabilities(df_p_labels['predictions'], df_p_labels['label'])
df_p_labels['s_preds'] = s_preds
df_p_labels['i_preds'] = i_preds

In [None]:
df_p_labels.head()

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(24, 8))

for i, preds in enumerate(['predictions', 's_preds' , 'i_preds']):
    # Clip and visualize weights 
    df_p_labels = (df_p_labels
        .assign(clipped_preds = lambda x: [0.05 if p < 0.05 else p for p in x[preds]])
    )
    df_p_labels['weights_%s' % preds] = [1./p for p in df_p_labels.clipped_preds]
    
    if preds == 'predictions': 
        lab = 'Uncalibrated'
    elif preds == 's_preds':
        lab = 'Sigmoid Calibrated'
    else:
        lab = 'Isotonic Calibrated'
    alpha = [0.1, 1.0, 0.1]
    for j in range(3):
        if j == 0:
            alpha = [1.0, 0.2, 0.2]
        elif j == 1:
            alpha = [0.2, 1.0, 0.2]
        else:
            alpha = [0.2, 0.2, 1.0]
        sns.set(font_scale=1.5)
        sns.distplot(df_p_labels[df_p_labels['label']==1]['weights_%s' % preds],
                     kde=False,
                     ax=ax[j],
                     label=lab,
                     hist_kws=dict(alpha=alpha[i]))
        ax[j].set_title("Inverse Probability Weights")
        ax[j].set_xlim([1, 20])
        ax[j].legend()

### Read in Coverage predictions

In [None]:
def load_predictions(base_path):
    """Helper function that loads predictions from AST classifiers for test set data"""
    
    abx_options = ["Vancomycin",
                   "Ampicillin",
                   "Cefazolin",
                   "Ceftriaxone",
                   "Cefepime",
                   "Zosyn",
                   "Ciprofloxacin",
                   "Meropenem",
                   "Vancomycin_Meropenem",
                   "Vancomycin_Zosyn",
                   "Vancomycin_Cefepime",
                   "Vancomycin_Ceftriaxone"
                   ]
    df = pd.DataFrame()
    for i, abx in enumerate(abx_options):
        path = base_path.format(abx=abx)
        f_path = glob.glob(os.path.join(path, '*predictions.csv'))[0]
        if i == 0:
            df = pd.read_csv(f_path)
            df = df[['anon_id', 'pat_enc_csn_id_coded', 'label', 'predictions']]
            df = df.rename(columns={'label' : '%s_label' % abx,
                                    'predictions' : '%s_predictions' % abx})
        else:
            df_preds = pd.read_csv(f_path)
            df_preds = df_preds[['anon_id', 'pat_enc_csn_id_coded', 'label', 'predictions']]
            df_preds = df_preds.rename(columns={'label' : '%s_label' % abx,
                                                'predictions' : '%s_predictions' % abx})
            df = df.merge(df_preds, how='left', on=['anon_id', 'pat_enc_csn_id_coded'])
    
    return df
    
base_path="/Users/conorcorbin/repos/er_infection/results/ast_models_w_not_infected/testing/{abx}"
df_ipw = load_predictions(baes_path)


In [None]:
df_final = (df
    .merge(df_p_labels, how='left', on='pat_enc_csn_id_coded')
)
df_final.head()

In [None]:
import pdb
def bootstrapped_performance(labels, predictions, weights):
    """Given labels and predictions, bootstraps auroc and average precision
       and returns mean values along with 95%ci range for both of them"""
    
    aurocs,  aurocs_ipw = [], []
    inds = [i for i in range(len(labels))]
    for i in range(1000):
        inds_boot = np.random.choice(inds, size=len(inds), replace=True)
        labels_b = labels[inds_boot]
        predictions_b = predictions[inds_boot]
        weights_b = weights[inds_boot]
        aurocs.append(roc_auc_score(labels_b, predictions_b))
        aurocs_ipw.append(roc_auc_score(labels_b, predictions_b, sample_weight=weights_b))
    mean_auc = np.mean(aurocs)
    auc_low = np.percentile(aurocs, 2.5)
    auc_high = np.percentile(aurocs, 97.5)
    auroc = "%.2f [%.2f, %.2f]" % (mean_auc, auc_low, auc_high)
    
    mean_auc_ipw = np.mean(aurocs_ipw)
    auc_ipw_low = np.percentile(aurocs_ipw, 2.5)
    auc_ipw_high = np.percentile(aurocs_ipw, 97.5)
    auroc_ipw = "%.2f [%.2f, %.2f]" % (mean_auc_ipw, auc_ipw_low, auc_ipw_high)
    
    prev = str(round(np.average(labels), 2))
    
    return auroc, auroc_ipw, prev


df_test_results = pd.DataFrame()
abx_options = ["Vancomycin",
               "Ampicillin",
               "Cefazolin",
               "Ceftriaxone",
               "Cefepime",
               "Zosyn",
               "Ciprofloxacin",
               "Meropenem",
               "Vancomycin_Meropenem",
               "Vancomycin_Zosyn",
               "Vancomycin_Cefepime",
               "Vancomycin_Ceftriaxone"
               ]

df_test_results['Antibiotic Selection'] = [a for a in abx_options]


base_path = "/Users/conorcorbin/repos/er_infection/results/ast_models_bucket1/testing/{abx}"
best_model_classes = []
prevs = []
aurocs_ipw = []
aurocs = []
for abx in abx_options:
    auroc, auroc_ipw, prev = bootstrapped_performance(df_final['%s_label' % abx].values,
                                               df_final['%s_predictions' % abx].values,
                                               df_final['weights_predictions'].values)
    prevs.append(prev)
    aurocs_ipw.append(auroc_ipw)
    aurocs.append(auroc)
    
df_test_results['AUROC'] = aurocs
df_test_results['AUROC IPW'] = aurocs_ipw

In [None]:
df_test_results

In [None]:
df_test_results.to_csv('IPW_weights_results_table.csv', index=None)

In [None]:
df_final = (df
    .merge(df_p_labels, how='left', on='pat_enc_csn_id_coded')
)
abx_options = ["Vancomycin",
               "Ampicillin",
               "Cefazolin",
               "Ceftriaxone",
               "Cefepime",
               "Zosyn",
               "Ciprofloxacin",
               "Meropenem",
               "Vancomycin_Meropenem",
               "Vancomycin_Zosyn",
               "Vancomycin_Cefepime",
               "Vancomycin_Ceftriaxone"
               ]

# Compute AUROC for each antibiotic weighted by each of the three weights
aurocs = {}
aurocs['Antibiotic'] = []
aurocs['Unweighted'] = []
aurocs['Weighted Uncalibrated'] = []
aurocs['Weighted Sigmoid Calibrated'] = []
aurocs['Weighted Isotonic Calibrated'] = []
for abx in abx_options:
    aurocs['Antibiotic'].append(abx)
    predictions = df_final['%s_predictions' % abx]
    labels = df_final['%s_label' % abx]

    for w in ['unweighted', 'predictions', 's_preds' , 'i_preds']:
        if w != 'unweighted':
            weights = df_final[w]
        else:
            weights = [1.0 for i in range(len(df_final))]
        
        auroc = round(roc_auc_score(labels, predictions, sample_weight=weights), 2)
        if w == 'unweighted':
            aurocs['Unweighted'].append(auroc)
        elif w == 'predictions':
            aurocs['Weighted Uncalibrated'].append(auroc)
        elif w == 's_preds':
            aurocs['Weighted Sigmoid Calibrated'].append(auroc)
        else:
            aurocs['Weighted Isotonic Calibrated'].append(auroc)

df_results = pd.DataFrame(data=aurocs)
    

In [None]:
df_results

In [None]:
import pdb
from tqdm import tqdm
def bootstrapped_performance(df, measure):
    """
    Given a performance measure, bootstraps test set examples and plots boxplots of said performance measure
    using each of the weighting scheme for each antibiotic label. 
    """
    df_results = pd.DataFrame()
    for i in tqdm(range(1000)):
        meas = {}
        meas['Antibiotic'] = []
        meas['Unweighted'] = []
        meas['Weighted Uncalibrated'] = []
        meas['Weighted Sigmoid Calibrated'] = []
        meas['Weighted Isotonic Calibrated'] = []
        
        df_sampled = (df
            .sample(n=len(df), replace=True)
        )
        for abx in abx_options:
            meas['Antibiotic'].append(abx)
            predictions = df_sampled['%s_predictions' % abx]
            labels = df_sampled['%s_label' % abx]

            for w in ['unweighted', 'predictions', 's_preds' , 'i_preds']:
                if w != 'unweighted':
                    weights = df_sampled[w]
                else:
                    weights = [1.0 for i in range(len(df_sampled))]

                m = measure(labels, predictions, sample_weight=weights)
                if w == 'unweighted':
                    meas['Unweighted'].append(m)
                elif w == 'predictions':
                    meas['Weighted Uncalibrated'].append(m)
                elif w == 's_preds':
                    meas['Weighted Sigmoid Calibrated'].append(m)
                else:
                    meas['Weighted Isotonic Calibrated'].append(m)

        df_meas = pd.DataFrame(data=meas)
        df_results = pd.concat([df_results, df_meas])
    df_results = (df_results
        .melt(id_vars=['Antibiotic'],
              value_vars=['Unweighted', 'Weighted Uncalibrated',
                          'Weighted Sigmoid Calibrated', 'Weighted Isotonic Calibrated'],
              var_name='Weighting',
              value_name='score')
        .reset_index()
    )
    return df_results
        

### Bootstrap AUROC

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(32, 8))
df_auroc = bootstrapped_performance(df_final, roc_auc_score)

sns.set(style='whitegrid', font_scale=2.0)
sns.boxplot(x='Antibiotic',
            y='score',
            hue='Weighting',
            data=df_auroc,
            fliersize=0
            )
plt.xticks(rotation = 45)
ax.set_ylabel('AUROC')

### Bootstrap Average Precision

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(32, 8))
df_ap = bootstrapped_performance(df_final, average_precision_score)

sns.set(style='whitegrid', font_scale=2.0)
sns.boxplot(x='Antibiotic',
            y='score',
            hue='Weighting',
            data=df_ap,
            fliersize=0
            )
plt.xticks(rotation = 45)
ax.set_ylabel('Average Precision')

### Bootstrap Brier Score


In [None]:
fig, ax = plt.subplots(1, 1, figsize=(32, 8))
df_ap = bootstrapped_performance(df_final, brier_score_loss)

sns.set(style='whitegrid', font_scale=2.0)
sns.boxplot(x='Antibiotic',
            y='score',
            hue='Weighting',
            data=df_ap,
            fliersize=0
            )
plt.xticks(rotation = 45)
ax.set_ylabel('Brier Score')