In [None]:
# import packages
import numpy as np
import pandas as pd
import os
import re
import tensorflow as tf
#tf.enable_eager_execution()
import tensorflow.keras as keras
from sklearn import metrics
from tensorflow.python.ops import math_ops
from tensorflow.python.keras import backend
from time import time
from sklearn.utils import resample

In [None]:
# import test dataset
test_input = "/home/jupyter/datasets/training_data/data_before_24hrs_icu/data_grouped_HADM_ID/padded_arrays/all_events_test.tfrecord"

In [None]:
# check test dataset for number of records
test_records = sum(1 for _ in tf.python_io.tf_record_iterator(test_input))
print(test_records)

In [None]:
# assign file path to tfrecords dataset
test_filenames = [test_input]
test_dataset = tf.data.TFRecordDataset(test_filenames)

In [None]:
# Create Parse Functions

# Create a description of the features.  
feature_description = {
    'HOSPITAL_EXPIRE_FLAG': tf.FixedLenFeature([], dtype=tf.int64),
    'LOS': tf.FixedLenFeature([], dtype=tf.int64),
    'feature1': tf.VarLenFeature(dtype=tf.int64),
    'feature2': tf.VarLenFeature(dtype=tf.int64),
    'feature3': tf.VarLenFeature(dtype=tf.int64),
    'feature4': tf.VarLenFeature(dtype=tf.int64),
    'feature5': tf.VarLenFeature(dtype=tf.int64),
    'feature6': tf.VarLenFeature(dtype=tf.int64),
    'feature7': tf.VarLenFeature(dtype=tf.int64),
    'feature8': tf.VarLenFeature(dtype=tf.int64),
    'feature9': tf.VarLenFeature(dtype=tf.int64)
}

# Parse function for All Events Mortality
def _parse_function_all_events_mortality(example_proto):
    x = tf.parse_single_example(example_proto, feature_description)
    label = tf.cast(x['HOSPITAL_EXPIRE_FLAG'],dtype='int32')
    ch_events = tf.cast(tf.sparse.to_dense(x['feature1']),dtype='int32')
    inputcv_events = tf.cast(tf.sparse.to_dense(x['feature2']),dtype='int32')
    inputmv_events = tf.cast(tf.sparse.to_dense(x['feature3']),dtype='int32')
    lab_events = tf.cast(tf.sparse.to_dense(x['feature4']),dtype='int32')
    microbio_events = tf.cast(tf.sparse.to_dense(x['feature5']),dtype='int32')
    note_events = tf.cast(tf.sparse.to_dense(x['feature6']),dtype='int32')
    output_events = tf.cast(tf.sparse.to_dense(x['feature7']),dtype='int32')
    prescription_events = tf.cast(tf.sparse.to_dense(x['feature8']),dtype='int32')
    procedure_events = tf.cast(tf.sparse.to_dense(x['feature9']),dtype='int32')
    
    return ((ch_events,
                 inputcv_events,
                 inputmv_events,
                 lab_events,
                 microbio_events,
                 note_events,
                 output_events,
                 prescription_events,
                 procedure_events),
            label)#{'labels':label, 'ch_events':ch_events}

# Parse function for Chart Events Mortality
def _parse_function_ch_events_mortality(example_proto):
    x = tf.parse_single_example(example_proto, feature_description)
    label = tf.cast(x['HOSPITAL_EXPIRE_FLAG'],dtype='int32')
    ch_events = tf.cast(tf.sparse.to_dense(x['feature1']),dtype='int32')
    inputcv_events = tf.cast(tf.sparse.to_dense(x['feature2']),dtype='int32')
    inputmv_events = tf.cast(tf.sparse.to_dense(x['feature3']),dtype='int32')
    lab_events = tf.cast(tf.sparse.to_dense(x['feature4']),dtype='int32')
    microbio_events = tf.cast(tf.sparse.to_dense(x['feature5']),dtype='int32')
    note_events = tf.cast(tf.sparse.to_dense(x['feature6']),dtype='int32')
    output_events = tf.cast(tf.sparse.to_dense(x['feature7']),dtype='int32')
    prescription_events = tf.cast(tf.sparse.to_dense(x['feature8']),dtype='int32')
    procedure_events = tf.cast(tf.sparse.to_dense(x['feature9']),dtype='int32')
    
    return ((ch_events),label)#{'labels':label, 'ch_events':ch_events}

# Parse function for All Events Length of stay
def _parse_function_all_events_LOS(example_proto):
    x = tf.parse_single_example(example_proto, feature_description)
    label = tf.cast(x['LOS'],dtype='int32')
    ch_events = tf.cast(tf.sparse.to_dense(x['feature1']),dtype='int32')
    inputcv_events = tf.cast(tf.sparse.to_dense(x['feature2']),dtype='int32')
    inputmv_events = tf.cast(tf.sparse.to_dense(x['feature3']),dtype='int32')
    lab_events = tf.cast(tf.sparse.to_dense(x['feature4']),dtype='int32')
    microbio_events = tf.cast(tf.sparse.to_dense(x['feature5']),dtype='int32')
    note_events = tf.cast(tf.sparse.to_dense(x['feature6']),dtype='int32')
    output_events = tf.cast(tf.sparse.to_dense(x['feature7']),dtype='int32')
    prescription_events = tf.cast(tf.sparse.to_dense(x['feature8']),dtype='int32')
    procedure_events = tf.cast(tf.sparse.to_dense(x['feature9']),dtype='int32')
    
    return ((ch_events,
                 inputcv_events,
                 inputmv_events,
                 lab_events,
                 microbio_events,
                 note_events,
                 output_events,
                 prescription_events,
                 procedure_events),
            label)#{'labels':label, 'ch_events':ch_events}

# Parse Funtion for Chartevents Length of Stay
def _parse_function_ch_events_LOS(example_proto):
    x = tf.parse_single_example(example_proto, feature_description)
    label = tf.cast(x['LOS'],dtype='int32')
    ch_events = tf.cast(tf.sparse.to_dense(x['feature1']),dtype='int32')
    inputcv_events = tf.cast(tf.sparse.to_dense(x['feature2']),dtype='int32')
    inputmv_events = tf.cast(tf.sparse.to_dense(x['feature3']),dtype='int32')
    lab_events = tf.cast(tf.sparse.to_dense(x['feature4']),dtype='int32')
    microbio_events = tf.cast(tf.sparse.to_dense(x['feature5']),dtype='int32')
    note_events = tf.cast(tf.sparse.to_dense(x['feature6']),dtype='int32')
    output_events = tf.cast(tf.sparse.to_dense(x['feature7']),dtype='int32')
    prescription_events = tf.cast(tf.sparse.to_dense(x['feature8']),dtype='int32')
    procedure_events = tf.cast(tf.sparse.to_dense(x['feature9']),dtype='int32')
    
    return ((ch_events),label)#{'labels':label, 'ch_events':ch_events}

In [None]:
# Define custom layer funtion  
def call(inputs, mask=None):
    steps_axis = 1
    if mask is not None:
        mask = math_ops.cast(mask, backend.floatx())
        input_shape = inputs.shape.as_list()
        broadcast_shape = [-1, input_shape[steps_axis], 1]
        mask = array_ops.reshape(mask, broadcast_shape)
        inputs *= mask
        return backend.sum(inputs, axis=steps_axis) / (math_ops.reduce_sum(mask, axis=steps_axis)+backend.epsilon())
    else:
        return backend.mean(inputs, axis=steps_axis)

In [None]:
# Load Models which need to be evaluvated
# Hyper tuned models as loaded into the script. 
#If you used AI Platform to hypertune your models then download the models and modify the paths below.

# Load Mortality model traine dusing AllSources/All Events
model_path = "/home/jupyter/output/model/hypertuned_validation/Mortality_all_events/model/1578250546/"
all_events_mortality = tf.contrib.saved_model.load_keras_model(model_path)
all_events_mortality.compile(optimizer=keras.optimizers.Adam(), loss= 'binary_crossentropy')

# Load Mortality model traine dusing ChartEvents
model_path = "/home/jupyter/output/model/hypertuned_validation2/Mortality_ch_events/model/1578380697/"
ch_events_mortality = tf.contrib.saved_model.load_keras_model(model_path)
ch_events_mortality.compile(optimizer=keras.optimizers.Adam(), loss= 'binary_crossentropy')

# Load LOS model traine dusing AllSources/All Events
model_path = "/home/jupyter/output/model/hypertuned_validation/LOS_all_events/model/1578262133/"
all_events_LOS = tf.contrib.saved_model.load_keras_model(model_path)
all_events_LOS.compile(optimizer=keras.optimizers.Adam(), loss= 'binary_crossentropy')

# Load LOS model traine dusing ChartEvents
model_path = "/home/jupyter/output/model/hypertuned_validation/LOS_ch_events/model/1578224943/"
ch_events_LOS = tf.contrib.saved_model.load_keras_model(model_path)
ch_events_LOS.compile(optimizer=keras.optimizers.Adam(), loss= 'binary_crossentropy')

In [None]:

# Define funtion to run inference on test set
def run_inference(model, _parse_function):
    batch_size = 128
    test_parsed_dataset = test_dataset.map(_parse_function,num_parallel_calls=tf.data.experimental.AUTOTUNE)
    test_parsed_dataset = test_parsed_dataset.batch(batch_size)
    test_parsed_dataset = test_parsed_dataset.repeat()

    results = model.predict(test_parsed_dataset,
                            steps=int(test_records/batch_size)+1,
                            verbose=1)

    test = test_dataset.map(_parse_function,num_parallel_calls=tf.data.experimental.AUTOTUNE)

    n = test.make_one_shot_iterator().get_next()
    sess = tf.Session()

    output_labels=[]

    for i in range(test_records):
      value=sess.run(n)
      output_labels.append(value[1])

    len(output_labels)

    result = pd.DataFrame(results, columns=['Predictions'])

    result['Predictions'] = round(result['Predictions'],3)

    result['pred_1'] = round(result['Predictions'])

    result['actual'] = output_labels

    return result

In [None]:
result_all_mortality = run_inference(model=all_events_mortality, _parse_function=_parse_function_all_events_mortality)
result_ch_mortality = run_inference(model=ch_events_mortality, _parse_function=_parse_function_ch_events_mortality)
result_all_LOS = run_inference(model=all_events_LOS, _parse_function=_parse_function_all_events_LOS)
result_ch_LOS = run_inference(model=ch_events_LOS, _parse_function=_parse_function_ch_events_LOS)

In [None]:
# funtion to create AUC-ROC, PR-CURVE, Calibration Curve
def print_roc_pr_calibration_curve(model, _parse_function):
    result = run_inference(model = model, _parse_function=_parse_function)
    
    # Plotting AUROC Curve - Calculatae Metrics
    from sklearn import metrics
    y_ACTUAL= result['actual']
    scores_prob = result['Predictions']
    fpr, tpr, thresholds = metrics.roc_curve(y_ACTUAL, scores_prob, pos_label=1)
    roc_auc = metrics.auc(fpr, tpr)
    # Plotting AUROC Curve - Plot Curve
    import matplotlib.pyplot as plt
    roc_curve = plt.figure(figsize=(8,6))
    plt.title('Receiver Operating Characteristic',fontdict={"size":20})
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.ylabel('True Positive Rate or Sensitivity', fontdict={"size":15})
    plt.xlabel('False Positive Rate or 1-Specificity',fontdict={"size":15})
    plt.plot(fpr, tpr, label = 'AUC = %0.2f' % roc_auc)
    plt.plot([0, 1], [0, 1],linestyle='--',color='red', label = 'No Skill Line')
    plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.125), fontsize = 12)
    
    # Plotting Precision-Recall Curve - Calculate Metrics
    y_ACTUAL = result['actual']
    scores_prob = result['Predictions']
    yhat = result['pred_1']
    precision, recall, thresholds = metrics.precision_recall_curve(y_ACTUAL, scores_prob, pos_label=1)
    prc_auc = metrics.auc(recall,precision)
    f1 = metrics.f1_score(y_ACTUAL, yhat)
    ap = metrics.average_precision_score(y_ACTUAL, yhat)
    mortality_ratio = round(sum(result['actual'])/(len(result['actual'])),2)
     # Plotting Precision-Recall Curve - PLot Curve
    pr_curve = plt.figure(figsize=(8,6))
    plt.title('Precision Recall Curve',fontdict={"size":20})
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.ylabel('Precision',fontdict={"size":15})
    plt.xlabel('Recall',fontdict={"size":15})
    plt.plot(recall, precision, label = 'F1=%.2f  AUC=%.2f' % (f1, prc_auc))
    positive_class_ratio = mortality_ratio
    plt.plot([0, 1], [positive_class_ratio, positive_class_ratio],linestyle='--',color='red', label = 'No Skill Line')
    plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.125),fontsize = 12)

    # Plotting Calibration curve
    from sklearn.calibration import calibration_curve
    x, y = calibration_curve(result['actual'], result['Predictions'], n_bins = 10, strategy='uniform')
    import matplotlib.pyplot as plt
    claibration_curve = plt.figure(figsize=(8,6))
    plt.title('Calibration Curve',fontdict={"size":20})
    plt.plot(x,y, marker = 'o', color = 'orange')
    plt.plot([0, 1], [0, 1],linestyle='--',color='black', label = 'Perfectly Calibrated')
    plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.125),fontsize = 12)
    plt.ylabel('Fraction of Positives',fontdict={"size":15})
    plt.xlabel('Mean Predicted value',fontdict={"size":15})
    return plt.show()

### Plot AUCROC, PR-AUC, Calibration Curves

In [None]:
# Plotting AUROC Curve - Calculatae Metrics
from sklearn import metrics
def calc_aucroc_data(result):
    y_ACTUAL= result['actual']
    scores_prob = result['Predictions']
    fpr, tpr, thresholds = metrics.roc_curve(y_ACTUAL, scores_prob, pos_label=1)
    roc_auc = metrics.auc(fpr, tpr)
    return fpr, tpr, thresholds, roc_auc

def calc_aucpr_data(result):
    y_ACTUAL = result['actual']
    scores_prob = result['Predictions']
    yhat = result['pred_1']
    precision, recall, thresholds = metrics.precision_recall_curve(y_ACTUAL, scores_prob, pos_label=1)
    prc_auc = metrics.auc(recall,precision)
    f1 = metrics.f1_score(y_ACTUAL, yhat)
    ap = metrics.average_precision_score(y_ACTUAL, yhat)
    mortality_ratio = round(sum(result['actual'])/(len(result['actual'])),2)
    return recall, precision, f1, prc_auc, mortality_ratio

fpr_am, tpr_am, thresholds_am, roc_auc_am = calc_aucroc_data(result_all_mortality)
fpr_cm, tpr_cm, thresholds_cm, roc_auc_cm = calc_aucroc_data(result_ch_mortality)
recall_am, precision_am, f1_am, prc_auc_am, mortality_ratio = calc_aucpr_data(result_all_mortality)
recall_cm, precision_cm, f1_cm, prc_auc_cm, mortality_ratio = calc_aucpr_data(result_ch_mortality)



import matplotlib.pyplot as plt
fig, (ax1, ax2) = plt.subplots(1, 2,figsize=(16,6))
fig.suptitle('Comparision of AUC-ROC and PR-AUC Curves', fontsize=20, y = 1.03)

# Plotting AUROC Curve - Plot Curve
ax1.set_title('Receiver Operating Characteristic',fontdict={"size":20})
ax1.set_xlim([0, 1])
ax1.set_ylim([0, 1])
ax1.set_ylabel('True Positive Rate or Sensitivity', fontdict={"size":15})
ax1.set_xlabel('False Positive Rate or 1-Specificity',fontdict={"size":15})
ax1.plot(fpr_am, tpr_am, label = 'AUC-ROC IHM-AS = %0.2f' % roc_auc_am)
ax1.plot(fpr_cm, tpr_cm, label = 'AUC-ROC IHM-CS = %0.2f' % roc_auc_cm)
ax1.plot([0, 1], [0, 1],linestyle='--',color='red', label = 'No Skill Line')
ax1.legend(loc='upper center', bbox_to_anchor=(0.5, -0.125), fontsize = 12)  

# Plotting Precision-Recall Curve - PLot Curve
ax2.set_title('Precision Recall Curve',fontdict={"size":20})
ax2.set_xlim([0, 1])
ax2.set_ylim([0, 1])
ax2.set_ylabel('Precision',fontdict={"size":15})
ax2.set_xlabel('Recall',fontdict={"size":15})
ax2.plot(recall_am, precision_am, label = 'PR-AUC IHM-AS=%.2f' % (prc_auc_am))
ax2.plot(recall_cm, precision_cm, label = 'PR-AUC IHM-CS=%.2f' % (prc_auc_cm))
positive_class_ratio = mortality_ratio
ax2.plot([0, 1], [positive_class_ratio, positive_class_ratio],linestyle='--',color='red', label = 'No Skill Line')
ax2.legend(loc='upper center', bbox_to_anchor=(0.5, -0.125),fontsize = 12)
fig.show()


# Plotting Calibration curve
from sklearn.calibration import calibration_curve
x_am, y_am = calibration_curve(result_all_mortality['actual'], result_all_mortality['Predictions'], n_bins = 10, strategy='uniform')
x_cm, y_cm = calibration_curve(result_ch_mortality['actual'], result_ch_mortality['Predictions'], n_bins = 10, strategy='uniform')

import matplotlib.pyplot as plt
fig, (ax1, ax2) = plt.subplots(1, 2,figsize=(16,6))
fig.suptitle('Comparision of Calibration Plots (Reliability Curves)', fontsize=20)
ax1.plot(x_am, y_am, marker = 'o', color = 'tab:blue', label = 'IHM-AS')
ax1.plot([0, 1], [0, 1],linestyle='--',color='black', label = 'Perfectly Calibrated')
ax1.legend(loc='upper center', bbox_to_anchor=(0.5, -0.125),fontsize = 12)
ax1.set_ylabel('Fraction of Positives',fontdict={"size":15})
ax1.set_xlabel('Mean Predicted value',fontdict={"size":15})


ax2.plot(x_cm, y_cm, marker = 'o', color = 'tab:orange', label = 'IHM-CS')
ax2.plot([0, 1], [0, 1],linestyle='--',color='black', label = 'Perfectly Calibrated')
ax2.legend(loc='upper center', bbox_to_anchor=(0.5, -0.125),fontsize = 12)
ax2.set_ylabel('Fraction of Positives',fontdict={"size":15})
ax2.set_xlabel('Mean Predicted value',fontdict={"size":15})

fig.show()

In [None]:
print(roc_auc_am)
print(roc_auc_cm)
print(prc_auc_am)
print(prc_auc_cm)

In [None]:
# Plotting AUROC Curve - Calculatae Metrics
from sklearn import metrics
def calc_aucroc_data(result):
    y_ACTUAL= result['actual']
    scores_prob = result['Predictions']
    fpr, tpr, thresholds = metrics.roc_curve(y_ACTUAL, scores_prob, pos_label=1)
    roc_auc = metrics.auc(fpr, tpr)
    return fpr, tpr, thresholds, roc_auc

def calc_aucpr_data(result):
    y_ACTUAL = result['actual']
    scores_prob = result['Predictions']
    yhat = result['pred_1']
    precision, recall, thresholds = metrics.precision_recall_curve(y_ACTUAL, scores_prob, pos_label=1)
    prc_auc = metrics.auc(recall,precision)
    f1 = metrics.f1_score(y_ACTUAL, yhat)
    ap = metrics.average_precision_score(y_ACTUAL, yhat)
    mortality_ratio = round(sum(result['actual'])/(len(result['actual'])),2)
    return recall, precision, f1, prc_auc, mortality_ratio

fpr_am, tpr_am, thresholds_am, roc_auc_am = calc_aucroc_data(result_all_LOS)
fpr_cm, tpr_cm, thresholds_cm, roc_auc_cm = calc_aucroc_data(result_ch_LOS)
recall_am, precision_am, f1_am, prc_auc_am, mortality_ratio = calc_aucpr_data(result_all_LOS)
recall_cm, precision_cm, f1_cm, prc_auc_cm, mortality_ratio = calc_aucpr_data(result_ch_LOS)



import matplotlib.pyplot as plt
fig, (ax1, ax2) = plt.subplots(1, 2,figsize=(16,6))
fig.suptitle('Comparision of AUC-ROC and PR-AUC Curves', fontsize=20, y = 1.03)

# Plotting AUROC Curve - Plot Curve
ax1.set_title('Receiver Operating Characteristic',fontdict={"size":20})
ax1.set_xlim([0, 1])
ax1.set_ylim([0, 1])
ax1.set_ylabel('True Positive Rate or Sensitivity', fontdict={"size":15})
ax1.set_xlabel('False Positive Rate or 1-Specificity',fontdict={"size":15})
ax1.plot(fpr_am, tpr_am, label = 'AUC-ROC LOS-AS = %0.2f' % roc_auc_am)
ax1.plot(fpr_cm, tpr_cm, label = 'AUC-ROC LOS-CS = %0.2f' % roc_auc_cm)
ax1.plot([0, 1], [0, 1],linestyle='--',color='red', label = 'No Skill Line')
ax1.legend(loc='upper center', bbox_to_anchor=(0.5, -0.125), fontsize = 12)  

# Plotting Precision-Recall Curve - PLot Curve
ax2.set_title('Precision Recall Curve',fontdict={"size":20})
ax2.set_xlim([0, 1])
ax2.set_ylim([0, 1])
ax2.set_ylabel('Precision',fontdict={"size":15})
ax2.set_xlabel('Recall',fontdict={"size":15})
ax2.plot(recall_am, precision_am, label = 'PR-AUC LOS-AS=%.2f' % (prc_auc_am))
ax2.plot(recall_cm, precision_cm, label = 'PR-AUC LOS-CS=%.2f' % (prc_auc_cm))
positive_class_ratio = mortality_ratio
ax2.plot([0, 1], [positive_class_ratio, positive_class_ratio],linestyle='--',color='red', label = 'No Skill Line')
ax2.legend(loc='upper center', bbox_to_anchor=(0.5, -0.125),fontsize = 12)
fig.show()


# Plotting Calibration curve
from sklearn.calibration import calibration_curve
x_am, y_am = calibration_curve(result_all_LOS['actual'], result_all_LOS['Predictions'], n_bins = 10, strategy='uniform')
x_cm, y_cm = calibration_curve(result_ch_LOS['actual'], result_ch_LOS['Predictions'], n_bins = 10, strategy='uniform')

import matplotlib.pyplot as plt
fig, (ax1, ax2) = plt.subplots(1, 2,figsize=(16,6))
fig.suptitle('Comparision of Calibration Plots (Reliability Curves)', fontsize=20)
ax1.plot(x_am, y_am, marker = 'o', color = 'tab:blue', label = 'LOS-AS')
ax1.plot([0, 1], [0, 1],linestyle='--',color='black', label = 'Perfectly Calibrated')
ax1.legend(loc='upper center', bbox_to_anchor=(0.5, -0.125),fontsize = 12)
ax1.set_ylabel('Fraction of Positives',fontdict={"size":15})
ax1.set_xlabel('Mean Predicted value',fontdict={"size":15})


ax2.plot(x_cm, y_cm, marker = 'o', color = 'tab:orange', label = 'LOS-CS')
ax2.plot([0, 1], [0, 1],linestyle='--',color='black', label = 'Perfectly Calibrated')
ax2.legend(loc='upper center', bbox_to_anchor=(0.5, -0.125),fontsize = 12)
ax2.set_ylabel('Fraction of Positives',fontdict={"size":15})
ax2.set_xlabel('Mean Predicted value',fontdict={"size":15})

fig.show()

In [None]:
print(roc_auc_am)
print(roc_auc_cm)
print(prc_auc_am)
print(prc_auc_cm)

In [None]:
def calculate_roc_auc(result):    
    from sklearn import metrics
    y_ACTUAL= result['actual']
    scores_prob = result['Predictions']
    fpr, tpr, thresholds = metrics.roc_curve(y_ACTUAL, scores_prob, pos_label=1)
    roc_auc = metrics.auc(fpr, tpr)
    
    y_ACTUAL = result['actual']
    scores_prob = result['Predictions']
    yhat = result['pred_1']
    precision, recall, thresholds = metrics.precision_recall_curve(y_ACTUAL, scores_prob, pos_label=1)
    prc_auc = metrics.auc(recall,precision)   
    return roc_auc, prc_auc

In [None]:
def bootstrapped_AUC(result):
    from sklearn.utils import resample
    from tqdm import tqdm

    n_iter = 10000
    roc_auc = list()
    prc_auc = list()


    for i in range(n_iter):
        #Calculating AUROC for each sample
        result_sample = resample(result, n_samples = len(result), random_state = i)
        y_ACTUAL= result_sample['actual']
        scores_prob = result_sample['Predictions']
        fpr, tpr, thresholds = metrics.roc_curve(y_ACTUAL, scores_prob, pos_label=1)
        roc_auc.append(metrics.auc(fpr, tpr))

        #calculate AUPRC for each sample
        y_ACTUAL = result_sample['actual']
        scores_prob = result_sample['Predictions']
        yhat = result_sample['pred_1']
        precision, recall, thresholds = metrics.precision_recall_curve(y_ACTUAL, scores_prob, pos_label=1)
        prc_auc.append(metrics.auc(recall,precision))
    
    return roc_auc, prc_auc

In [None]:
%%time
roc_auc_all_mortality,pr_auc_all_mortality  = bootstrapped_AUC(result_all_mortality)
roc_auc_ch_mortality,pr_auc_ch_mortality  = bootstrapped_AUC(result_ch_mortality)
roc_auc_all_LOS,pr_auc_all_LOS  = bootstrapped_AUC(result_all_LOS)
roc_auc_ch_LOS,pr_auc_ch_LOS  = bootstrapped_AUC(result_ch_LOS)

### Calculate AUC Diff statistical Significance of Mortality Models

In [None]:
dict = {'roc_auc_all_mortality': roc_auc_all_mortality,
        'roc_auc_ch_mortality': roc_auc_ch_mortality,
        'pr_auc_all_mortality': pr_auc_all_mortality,
        'pr_auc_ch_mortality' : pr_auc_ch_mortality
       }

In [None]:
df_mortality = pd.DataFrame(dict)
df_mortality.head()

In [None]:
df_mortality.describe(percentiles=[0.025,0.975])

In [None]:
x = df_mortality.describe(percentiles=[0.025,0.975])
for i in df_mortality.columns:
    print(i+' 95%CI: {:.4f}({:.4f},{:.4f})'.format(x[i]['mean'], x[i]['2.5%'], x[i]['97.5%']))

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
ax = sns.distplot(roc_auc_ch_mortality)
ax = sns.distplot(roc_auc_all_mortality)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
ax = sns.distplot(pr_auc_ch_mortality)
ax = sns.distplot(pr_auc_all_mortality)

In [None]:
df_mortality
df_mortality['roc_auc_diff_mortality'] = df_mortality['roc_auc_all_mortality']-df_mortality['roc_auc_ch_mortality']
df_mortality['pr_auc_diff_mortality'] = df_mortality['pr_auc_all_mortality']-df_mortality['pr_auc_ch_mortality']

In [None]:
df_mortality.describe(percentiles=[0.025,0.975])

### Calculate AUC Diff statistical Significance of LOS Models

In [None]:
dict = {'roc_auc_all_LOS': roc_auc_all_LOS,
        'roc_auc_ch_LOS': roc_auc_ch_LOS,
        'pr_auc_all_LOS': pr_auc_all_LOS,
        'pr_auc_ch_LOS' : pr_auc_ch_LOS
       }

In [None]:
df_LOS = pd.DataFrame(dict)
df_LOS.head()

In [None]:
df_LOS.describe(percentiles=[0.025,0.975])

In [None]:
x = df_LOS.describe(percentiles=[0.025,0.975])

In [None]:
for i in df_LOS.columns:
    print(i+' 95%CI: {:.4f}({:.4f},{:.4f})'.format(x[i]['mean'], x[i]['2.5%'], x[i]['97.5%']))


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
ax = sns.distplot(roc_auc_ch_LOS)
ax = sns.distplot(roc_auc_all_LOS)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
ay = sns.distplot(pr_auc_ch_LOS)
ay = sns.distplot(pr_auc_all_LOS)

In [None]:
df_LOS['auc_diff_LOS'] = df_LOS['roc_auc_all_LOS']-df_LOS['roc_auc_ch_LOS']
df_LOS['pr_auc_diff_LOS'] = df_LOS['pr_auc_all_LOS']-df_LOS['pr_auc_ch_LOS']

In [None]:
df_LOS.describe(percentiles=[0.025,0.975])

### Bootstrap Hypothesis Testing Mortality

In [None]:
combined = pd.concat([result_all_mortality,result_ch_mortality])

In [None]:
perms_all_mortality = []
perms_ch_mortality = []
perms_all_mortality_pr = []
perms_ch_mortality_pr = []
dif_bootstrap_auc = []
dif_bootstrap_pr = []

In [None]:
%%time
for i in range(10000):
    np.random.seed(i)
    perms_all_mortality_roc, perms_all_mortality_pr = calculate_roc_auc(resample(combined, n_samples = len(result_all_mortality)))
    perms_ch_mortality_roc, perms_ch_mortality_pr  = calculate_roc_auc(resample(combined, n_samples = len(result_ch_mortality)))
    dif_bootstrap_auc.append(perms_all_mortality_roc-perms_ch_mortality_roc)
    dif_bootstrap_pr.append(perms_all_mortality_pr-perms_ch_mortality_pr)


In [None]:
fig = plt.figure(figsize=(10,3))
ax = sns.distplot(dif_bootstrap_auc)

plt.xlabel('Difference in Likes')
plt.ylabel('Frequency')
plt.title('Bootstrapped Population (Combined data)')
plt.show()

# Observed Difference
obs_difs = (calculate_roc_auc(result_all_mortality)[0] - calculate_roc_auc(result_ch_mortality)[0])
print('observed difference in AUROC: {}'.format(obs_difs))
p_value = sum(dif_bootstrap_auc >= obs_difs)/10000
print('p-value: {}'.format(p_value))

In [None]:
fig = plt.figure(figsize=(10,3))
ax = sns.distplot(dif_bootstrap_pr)

plt.xlabel('Difference in Likes')
plt.ylabel('Frequency')
plt.title('Bootstrapped Population (Combined data)')
plt.show()

# Observed Difference
obs_difs = (calculate_roc_auc(result_all_mortality)[1] - calculate_roc_auc(result_ch_mortality)[1])
print('observed difference in AUROC: {}'.format(obs_difs))
p_value = sum(dif_bootstrap_pr >= obs_difs)/10000
print('p-value: {}'.format(p_value))

In [None]:
fig = plt.figure(figsize=(10,3))
ax = sns.distplot(dif_bootstrap_auc)

plt.xlabel('Difference in Likes')
plt.ylabel('Frequency')
plt.axvline(obs_difs, color='r')
plt.show()

### Bootstrap Hypothesis Testing LOS

In [None]:
combined = pd.concat([result_all_LOS,result_ch_LOS])

In [None]:
perms_all_LOS = []
perms_ch_LOS = []
perms_all_LOS_pr = []
perms_ch_LOS_pr = []
dif_bootstrap_auc = []
dif_bootstrap_pr = []

In [None]:
%%time
for i in range(10000):
    np.random.seed(i)
    perms_all_LOS_roc, perms_all_LOS_pr = calculate_roc_auc(resample(combined, n_samples = len(result_all_LOS)))
    perms_ch_LOS_roc, perms_ch_LOS_pr  = calculate_roc_auc(resample(combined, n_samples = len(result_ch_LOS)))
    dif_bootstrap_auc.append(perms_all_LOS_roc-perms_ch_LOS_roc)
    dif_bootstrap_pr.append(perms_all_LOS_pr-perms_ch_LOS_pr)

    


In [None]:
fig = plt.figure(figsize=(10,3))
ax = sns.distplot(dif_bootstrap_auc)

plt.xlabel('Difference in Likes')
plt.ylabel('Frequency')
plt.title('Bootstrapped Population (Combined data)')
plt.show()

# Observed Difference
obs_difs = (calculate_roc_auc(result_all_LOS)[0] - calculate_roc_auc(result_ch_LOS)[0])
print('observed difference in AUROC: {}'.format(obs_difs))
p_value = sum(dif_bootstrap_auc >= obs_difs)/10000
print('p-value: {}'.format(p_value))

In [None]:
fig = plt.figure(figsize=(10,3))
ax = sns.distplot(dif_bootstrap_pr)

plt.xlabel('Difference in Likes')
plt.ylabel('Frequency')
plt.title('Bootstrapped Population (Combined data)')
plt.show()

# Observed Difference
obs_difs = (calculate_roc_auc(result_all_LOS)[1] - calculate_roc_auc(result_ch_LOS)[1])
print('observed difference in PRAUC: {}'.format(obs_difs))
p_value = sum(dif_bootstrap_pr >= obs_difs)/10000
print('p-value: {}'.format(p_value))

In [None]:
fig = plt.figure(figsize=(10,3))
ax = sns.distplot(dif_bootstrap_auc)

plt.xlabel('Difference in Likes')
plt.ylabel('Frequency')
plt.axvline(obs_difs, color='r')
plt.show()