# **Disease Specific** Out of Distribution (External) Evaluation

In [None]:
import joblib
import numpy as np
from mimic_pipeline.metric import get_calibration_curve, get_model_size
from sklearn.metrics import (auc, brier_score_loss, precision_recall_curve,
                             roc_curve)
from typing import *
import pandas as pd
import mimic_pipeline.utils as utils

user = input("Enter your username: ")
password = input("Enter your password: ")
loader = utils.DataBaseLoader(user=user, password=password, dbname='eicu', schema='eicu')
SOFA = loader['sofa']

def ood_evaluate(disease, model: str, how: str, save: bool=False) -> Optional[dict]:
    print(f"Evaluating OOD performance for {model}...")
    if model in ['sapsii_prob', 'oasis_prob', 'apache_iv_prob', 'apache_iva_prob', 'sofa_emp_prob']:
        pass
    else:
        if how == 'all':
            trained_model = joblib.load(f"models/fasterrisk/{model}") # NOTE: see how trained on all patients perform
        elif how == 'disease':
            trained_model = joblib.load(f"models/disease/{disease}/{model}")
    eicu_df = pd.read_csv(f"data/eicu-disease/{disease}-union-features-id.csv")
    X_test, y_test = eicu_df.drop(['uniquepid', 'patientunitstayid', 'hospital_expire_flag', 'apache_iv_prob', 'apache_iva_prob', 'oasis_prob', 'sapsii_prob'], axis=1), eicu_df['hospital_expire_flag']
    
    if 'fasterrisk' in model:
        if how == 'all':
            binarizer = joblib.load(f"models/fasterrisk/{model}-binarizer")          # NOTE: see how trained on all patients perform
        elif how == 'disease':
            binarizer = joblib.load(f"models/disease/{disease}/{model}-binarizer")
        X_test, _ = binarizer.transform(X_test)
        y_prob = trained_model.predict_proba(X_test.to_numpy())
    elif model in ['sapsii_prob', 'oasis_prob', 'apache_iv_prob', 'apache_iva_prob', 'sofa_emp_prob']:
        if model == 'sofa_emp_prob':
            eicu_df = eicu_df.merge(SOFA[['uniquepid', 'patientunitstayid', 'sofa_emp_prob']], on=['uniquepid', 'patientunitstayid'], how='left')
        y_prob = eicu_df[model]
    
    fpr, tpr, _ = roc_curve(y_test, y_prob)
    auroc = auc(fpr, tpr)
    precision, recall, _ = precision_recall_curve(y_test, y_prob)
    auprc = auc(recall, precision)
    prob_true, prob_pred, h_stat, p1 = get_calibration_curve(y_test, y_prob)
    _, _, c_stat, p2 = get_calibration_curve(y_test, y_prob, strategy='quantile')
    brier = brier_score_loss(y_test, y_prob)
    smr = np.sum(y_test.replace({-1: 0})) / np.sum(y_prob)
    if model not in ['sapsii_prob', 'oasis_prob', 'apache_iv_prob', 'apache_iva_prob', 'sofa_emp_prob']:
        complexity = get_model_size(trained_model)
    else:
        complexity = 'NA'
    
    print(f"AUROC: {auroc}\nAUPRC: {auprc}\nHosmer Lemeshow H stat: {h_stat}, P-Value: {p1}\nHosmer Lemeshow C stat: {c_stat}, P-Value: {p2}\nBrier Score: {brier}\nSMR: {smr}\nComplexity: {complexity}\n")
    
    stats = {
        "auroc": auroc, "auprc": auprc, "precision": precision, "recall": recall, "fpr": fpr, "tpr": tpr, 
        "true_prob": prob_true, "pred_prob": prob_pred, "h-stat": h_stat, "h-p-value": p1, "c-stat": c_stat, 
        "c-p-value": p2, "brier": brier, "smr": smr, 'complexity': complexity,
    }
    
    if save:
        joblib.dump(stats, f"results/disease/{disease}/{model}-ood-stats")
    else:
        return stats

## SEPSIS

### FasterRisk (trained on all patients in MIMIC)

In [None]:
for group_sparsity in [10, 14, 15, 16, 17, 18, 19, 20, 25, 30, 35, 40, 45]:
    ood_evaluate(disease='sepsis', model=f'fasterrisk-{group_sparsity}', how='all', save=True)

### Baselines

In [None]:
for scores in ['apache_iv_prob', 'apache_iva_prob', 'oasis_prob', 'sapsii_prob', 'sofa_emp_prob']:
    ood_evaluate(disease='sepsis', model=scores, how='all', save=True)

## AMI

### FasterRisk (trained on all MIMIC patients)

In [None]:
for group_sparsity in [10, 14, 15, 16, 17, 18, 19, 20, 25, 30, 35, 40, 45]:
    ood_evaluate(disease='ami', model=f'fasterrisk-{group_sparsity}', how='all', save=True)

### Baselines

In [None]:
for scores in ['apache_iv_prob', 'apache_iva_prob', 'oasis_prob', 'sapsii_prob', 'sofa_emp_prob']:
    ood_evaluate(disease='ami', model=scores, how='all', save=True)

## Heart Failure

### FasterRisk (trained on all MIMIC patients)

In [None]:
for group_sparsity in [10, 14, 15, 16, 17, 18, 19, 20, 25, 30, 35, 40, 45]:
    ood_evaluate(disease='heart_failure', model=f'fasterrisk-{group_sparsity}', how='all', save=True)

### Baselines

In [None]:
for scores in ['apache_iv_prob', 'apache_iva_prob', 'oasis_prob', 'sapsii_prob', 'sofa_emp_prob']:
    ood_evaluate(disease='heart_failure', model=scores, how='all', save=True)

## AKF

### FasterRisk (trained on all patients in MIMIC)

In [None]:
for group_sparsity in [10, 14, 15, 16, 17, 18, 19, 20, 25, 30, 35, 40, 45]:
    ood_evaluate(disease='akf', model=f'fasterrisk-{group_sparsity}', how='all', save=True)

### Baselines

In [None]:
for scores in ['apache_iv_prob', 'apache_iva_prob', 'oasis_prob', 'sapsii_prob', 'sofa_emp_prob']:
    ood_evaluate(disease='akf', model=scores, how='all', save=True)