# Out of Distribution (External) Evaluation

In [None]:

import joblib
import numpy as np
from mimic_pipeline.metric import get_calibration_curve, get_model_size
from sklearn.metrics import (auc, brier_score_loss, precision_recall_curve,
                             roc_curve)
from typing import *
import pandas as pd


def ood_evaluate(eicu_df, model: str, exp: str, save: bool=False) -> Optional[dict]:
    print(f"Evaluating OOD performance for {model}...")
    if model in ['sapsii_prob', 'oasis_prob', 'apache_iv_prob', 'apache_iva_prob']:
        pass
    else:
        trained_model = joblib.load(f"models/{exp}/{model}")
    X_test, y_test = eicu_df.drop(['uniquepid', 'patientunitstayid', 'hospital_expire_flag', 'apache_iv_prob', 'apache_iva_prob', 'oasis_prob', 'sapsii_prob'], axis=1), eicu_df['hospital_expire_flag']
    if exp == 'oasis+' or model == 'fasterrisk-oasis':
        oasis_features = [
            'heartrate_min', 'heartrate_max', 'meanbp_min', 'meanbp_max', 'resprate_min', 'resprate_max', 'tempc_min', 
            'tempc_max', 'urineoutput', 'mechvent', 'electivesurgery', 'age', 'gcs_min', 'preiculos'
        ]
        X_test = X_test[oasis_features]
    
    if 'fasterrisk' in model or model in ['nonlinear-logreg-l1', 'nonlinear-logreg-l2']:
        binarizer = joblib.load(f"models/{exp}/{model}-binarizer")
        X_test, _ = binarizer.transform(X_test)
        y_prob = trained_model.predict_proba(X_test.to_numpy())
    elif model in ['sapsii_prob', 'oasis_prob', 'apache_iv_prob', 'apache_iva_prob']:
        y_prob = eicu_df[model]
    else:
        imputer = joblib.load(f"models/{exp}/{model}-imputer")
        scaler = joblib.load(f"models/{exp}/{model}-scaler")
        columns = list(X_test.columns)
        X_test = imputer.transform(X_test)
        X_test = pd.DataFrame(X_test, columns=columns)
        X_test = scaler.transform(X_test)
        y_prob = trained_model.predict_proba(X_test)
    
    if len(y_prob.shape) == 2:        # for some scikit-learn models where probas is 2D
        y_prob = y_prob[:, 1]
    
    fpr, tpr, _ = roc_curve(y_test, y_prob)
    auroc = auc(fpr, tpr)
    precision, recall, _ = precision_recall_curve(y_test, y_prob)
    auprc = auc(recall, precision)
    prob_true, prob_pred, h_stat, p1 = get_calibration_curve(y_test, y_prob)
    _, _, c_stat, p2 = get_calibration_curve(y_test, y_prob, strategy='quantile')
    brier = brier_score_loss(y_test, y_prob)
    smr = np.sum(y_test.replace({-1: 0})) / np.sum(y_prob)
    if model not in ['sapsii_prob', 'oasis_prob', 'apache_iv_prob', 'apache_iva_prob']:
        complexity = get_model_size(trained_model)
    else:
        complexity = 'NA'
    
    print(f"AUROC: {auroc}\nAUPRC: {auprc}\nHosmer Lemeshow H stat: {h_stat}, P-Value: {p1}\nHosmer Lemeshow C stat: {c_stat}, P-Value: {p2}\nBrier Score: {brier}\nSMR: {smr}\nComplexity: {complexity}\n")
    
    stats = {
        "auroc": auroc, "auprc": auprc, "precision": precision, "recall": recall, "fpr": fpr, "tpr": tpr, 
        "true_prob": prob_true, "pred_prob": prob_pred, "h-stat": h_stat, "h-p-value": p1, "c-stat": c_stat, 
        "c-p-value": p2, "brier": brier, "smr": smr, 'complexity': complexity,
    }
    
    if save:
        if model not in ['sapsii_prob', 'oasis_prob', 'apache_iv_prob', 'apache_iva_prob']:
            joblib.dump(stats, f"results/{exp}/{model}-ood-stats")
        else:
            joblib.dump(stats, f"results/{model}-ood-stats")
    else:
        return stats
    

## FasterRisk

In [None]:
eicu_df = pd.read_csv('data/eICU-union.csv')

In [None]:
for group_sparsity in range(10, 50, 5):
    ood_evaluate(eicu_df, model=f'fasterrisk-{group_sparsity}', exp='fasterrisk', save=True)

In [None]:
for i in [14, 15, 16, 17, 18, 19]:
    ood_evaluate(eicu_df, model=f'fasterrisk-{i}', exp='fasterrisk', save=True)

In [None]:
ood_evaluate(eicu_df, model="fasterrisk-oasis", exp='fasterrisk', save=True)

## Union 49 For Other ML Models

In [None]:
for model in ['ebm', 'nonlinear-logreg-l1', 'nonlinear-logreg-l2', 'random-forest', 'adaboost', 'xgboost']:
    ood_evaluate(eicu_df, model=model, exp='union49', save=True)

## OASIS+ Models

In [None]:
for model in ['ebm', 'nonlinear-logreg-l1', 'nonlinear-logreg-l2', 'random-forest', 'adaboost', 'xgboost']:
    ood_evaluate(eicu_df, model=model, exp='oasis+', save=True)

## Severity of Illness Scores

In [None]:
for model in ["oasis_prob", "sapsii_prob", "apache_iv_prob", "apache_iva_prob"]:
    ood_evaluate(eicu_df, model=model, exp='oasis+', save=True)