In [1]:
import pickle
import pandas as pd
import numpy as np
import scipy
import joblib

In [2]:
models_dict = joblib.load('models_dict.joblib')
models_dict.keys()

dict_keys(['Baseline', 'M-CURES'])

In [3]:
df_cohort = pd.read_csv('cohort.csv')
test_hosp, test_window, test_y = df_cohort['hosp_id'], df_cohort['window_id'], df_cohort['y']
cohort_IDs = df_cohort.set_index('ID')[[]]

In [4]:
df_cohort

Unnamed: 0,ID,hosp_id,window_id,y
0,90000-____0,90000,0,1
1,90000-____1,90000,1,1
2,90000-____2,90000,2,1
3,90001-____0,90001,0,0
4,90001-____1,90001,1,0


In [5]:
from sklearn import metrics, utils
from joblib import Parallel, delayed

def bootstrap_func(i, y_true, y_score):
    yte_true_b, yte_pred_b = utils.resample(y_true, y_score, replace=True, random_state=i)
    return metrics.roc_curve(yte_true_b, yte_pred_b), metrics.roc_auc_score(yte_true_b, yte_pred_b)

def get_roc_CI(y_true, y_score):
    roc_curves, auc_scores = zip(*Parallel(n_jobs=4)(delayed(bootstrap_func)(i, y_true, y_score) for i in range(1000)))
    print('Test AUC: ({:.3f}, {:.3f}) percentile 95% CI'.format(np.percentile(auc_scores, 2.5), np.percentile(auc_scores, 97.5)))

    tprs = []
    aucs = []
    mean_fpr = np.linspace(0, 1, 100)
    for fpr, tpr, _ in roc_curves:
        tprs.append(np.interp(mean_fpr, fpr, tpr))
        tprs[-1][0] = 0.0
        aucs.append(metrics.auc(fpr, tpr))

    mean_tpr = np.mean(tprs, axis=0)
    std_tpr = np.std(tprs, axis=0)
    tprs_upper = np.minimum(mean_tpr + 1.96 * std_tpr, 1)
    tprs_lower = np.maximum(mean_tpr - 1.96 * std_tpr, 0)
    return roc_curves, auc_scores, mean_fpr, tprs_lower, tprs_upper

def eval3():
    df_Yte = df_Yte_all.copy()
    df_Yte = df_Yte[df_Yte['window_id'] >= 1]
    df_Yte_agg = df_Yte.groupby(['hosp_id']).max()
    return df_Yte_agg

## Baseline model

In [6]:
baseline_clfs = models_dict['Baseline']
df_baseline = pd.read_csv('../preprocessing/sample_output/baseline.csv').set_index('ID')

In [7]:
df_baseline

Unnamed: 0_level_0,"age_value_(17.999, 47.0]","age_value_(47.0, 59.0]","age_value_(59.0, 67.0]","age_value_(67.0, 76.0]","age_value_(76.0, 102.0]",respiratoryrate_mask_0.0,respiratoryrate_mask_1.0,"respiratoryrate_max_(-0.001, 16.0]","respiratoryrate_max_(16.0, 18.0]","respiratoryrate_max_(18.0, 22.0]","respiratoryrate_max_(22.0, 200.0]","respiratoryrate_mean_(-0.001, 16.0]","respiratoryrate_mean_(16.0, 17.667]","respiratoryrate_mean_(17.667, 18.0]","respiratoryrate_mean_(18.0, 20.0]","respiratoryrate_mean_(20.0, 196.0]","respiratoryrate_min_(-0.001, 16.0]","respiratoryrate_min_(16.0, 18.0]","respiratoryrate_min_(18.0, 19.0]","respiratoryrate_min_(19.0, 196.0]","respiratoryrate_value_(-0.001, 16.0]","respiratoryrate_value_(16.0, 18.0]","respiratoryrate_value_(18.0, 20.0]","respiratoryrate_value_(20.0, 196.0]",spo2_mask_0.0,spo2_mask_1.0,"spo2_max_(-0.001, 94.0]","spo2_max_(94.0, 96.0]","spo2_max_(96.0, 97.0]","spo2_max_(97.0, 99.0]","spo2_max_(99.0, 100.0]","spo2_mean_(-0.001, 94.0]","spo2_mean_(94.0, 95.0]","spo2_mean_(95.0, 96.667]","spo2_mean_(96.667, 98.0]","spo2_mean_(98.0, 100.0]","spo2_min_(-0.001, 93.0]","spo2_min_(93.0, 95.0]","spo2_min_(95.0, 96.0]","spo2_min_(96.0, 98.0]","spo2_min_(98.0, 100.0]","spo2_value_(-0.001, 94.0]","spo2_value_(94.0, 95.0]","spo2_value_(95.0, 97.0]","spo2_value_(97.0, 98.0]","spo2_value_(98.0, 100.0]","_307928_value_(-0.001, 2.0]","_307928_value_(2.0, 3.0]","_307928_value_(3.0, 5.0]","_307928_value_(5.0, 22525.0]"
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1
90000-____0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
90000-____1,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
90000-____2,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
90001-____0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
90001-____1,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0


In [8]:
eval_matrix = scipy.sparse.csr_matrix(cohort_IDs.join(df_baseline).values.astype(float))
all_y = np.array([clf.predict_proba(eval_matrix)[:,1] for clf in baseline_clfs])

y_scores = all_y.mean(0)
df_Yte_all = pd.DataFrame({'hosp_id': test_hosp, 'window_id': test_window, 'y': test_y, 'y_score': y_scores})
df_Yte_agg = eval3()
y_score = df_Yte_agg['y_score']
y_true = df_Yte_all.groupby(['hosp_id']).first()['y']
fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score)
print('Test AUC: {:.3f}'.format(metrics.roc_auc_score(y_true, y_score)))

# Generate 95% CI
try:
    roc_curves, auc_scores, mean_fpr, tprs_lower, tprs_upper = get_roc_CI(y_true, y_score)
except:
    pass

Test AUC: 1.000


## M-CURES (lite) model

In [9]:
mcures_clfs = models_dict['M-CURES']
df_mcures = pd.read_csv('../preprocessing/sample_output/mcures.csv').set_index('ID')

In [10]:
df_mcures

Unnamed: 0_level_0,"age_value_(17.999, 47.0]","age_value_(47.0, 59.0]","age_value_(59.0, 67.0]","age_value_(67.0, 76.0]","age_value_(76.0, 102.0]",respiratoryrate_mask_0.0,respiratoryrate_mask_1.0,"respiratoryrate_max_(-0.001, 16.0]","respiratoryrate_max_(16.0, 18.0]","respiratoryrate_max_(18.0, 22.0]","respiratoryrate_max_(22.0, 200.0]","respiratoryrate_mean_(-0.001, 16.0]","respiratoryrate_mean_(16.0, 17.667]","respiratoryrate_mean_(17.667, 18.0]","respiratoryrate_mean_(18.0, 20.0]","respiratoryrate_mean_(20.0, 196.0]","respiratoryrate_min_(-0.001, 16.0]","respiratoryrate_min_(16.0, 18.0]","respiratoryrate_min_(18.0, 19.0]","respiratoryrate_min_(19.0, 196.0]","respiratoryrate_value_(-0.001, 16.0]","respiratoryrate_value_(16.0, 18.0]","respiratoryrate_value_(18.0, 20.0]","respiratoryrate_value_(20.0, 196.0]",spo2_mask_0.0,spo2_mask_1.0,"spo2_max_(-0.001, 94.0]","spo2_max_(94.0, 96.0]","spo2_max_(96.0, 97.0]","spo2_max_(97.0, 99.0]","spo2_max_(99.0, 100.0]","spo2_mean_(-0.001, 94.0]","spo2_mean_(94.0, 95.0]","spo2_mean_(95.0, 96.667]","spo2_mean_(96.667, 98.0]","spo2_mean_(98.0, 100.0]","spo2_min_(-0.001, 93.0]","spo2_min_(93.0, 95.0]","spo2_min_(95.0, 96.0]","spo2_min_(96.0, 98.0]","spo2_min_(98.0, 100.0]","spo2_value_(-0.001, 94.0]","spo2_value_(94.0, 95.0]","spo2_value_(95.0, 97.0]","spo2_value_(97.0, 98.0]","spo2_value_(98.0, 100.0]",81723_hilonormal_flag_value_H,81723_hilonormal_flag_value_HH,81723_hilonormal_flag_value_L,81723_hilonormal_flag_value_LL,81723_hilonormal_flag_value_N,81723_value_value_ <6.80,81723_value_value_ SEE BELOW,"81723_value_value_(6.7989999999999995, 7.32]","81723_value_value_(7.32, 7.36]","81723_value_value_(7.36, 7.4]","81723_value_value_(7.4, 7.43]","81723_value_value_(7.43, 7.73]",81799_hilonormal_flag_value_H,81799_hilonormal_flag_value_L,81799_hilonormal_flag_value_LL,81799_hilonormal_flag_value_N,81799_value_value_ <15,81799_value_value_ SEE BELOW,81799_value_value_ not available,"81799_value_value_(12.399000000000001, 23.0]","81799_value_value_(23.0, 26.0]","81799_value_value_(26.0, 30.0]","81799_value_value_(30.0, 36.0]","81799_value_value_(36.0, 59.0]","_307928_value_(-0.001, 2.0]","_307928_value_(2.0, 3.0]","_307928_value_(3.0, 5.0]","_307928_value_(5.0, 22525.0]",_313030_value_Continuous,_313030_value_Intermittent,_314689_value_Lying,_314689_value_Sitting,_314689_value_Standing,_355444_value_Developmentally supported position,_355444_value_HOB at 15 degrees,_355444_value_HOB at 30 degrees,_355444_value_HOB at 45 degrees,_355444_value_HOB at 60 degrees,_355444_value_HOB at 90 degrees,_355444_value_HOB flat (medical condition),_355444_value_Reverse Trendelenberg,_355444_value_other (see comments)
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1
90000-____0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
90000-____1,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
90000-____2,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
90001-____0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
90001-____1,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
eval_matrix = scipy.sparse.csr_matrix(cohort_IDs.join(df_mcures).values.astype(float))
all_y = np.array([clf.predict_proba(eval_matrix)[:,1] for clf in mcures_clfs])

y_scores = all_y.mean(0)
df_Yte_all = pd.DataFrame({'hosp_id': test_hosp, 'window_id': test_window, 'y': test_y, 'y_score': y_scores})
df_Yte_agg = eval3()
y_score = df_Yte_agg['y_score']
y_true = df_Yte_all.groupby(['hosp_id']).first()['y']
fpr, tpr, thresholds = metrics.roc_curve(y_true, y_score)
print('Test AUC: {:.3f}'.format(metrics.roc_auc_score(y_true, y_score)))

# Generate 95% CI
try:
    roc_curves, auc_scores, mean_fpr, tprs_lower, tprs_upper = get_roc_CI(y_true, y_score)
except:
    pass

Test AUC: 1.000
