# Calibrate Models

## Import Packages/Libraries + set globals

In [None]:
import sys

sys.path.append("../")
from src.data_utils import get_data, get_models
from src.nn_models import load_nn_clf

from sklearn.calibration import CalibratedClassifierCV
from sklearn.frozen import FrozenEstimator
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
from src.config import BASE_PATH, DEVICE, SEED
import joblib as jb

Set Globals

In [None]:
# Data
OUTCOME_DICT = {
    "surg": get_data("outcome_surg"),
    "bleed": get_data("outcome_bleed"),
    "asp": get_data("outcome_asp"),
    "mort": get_data("outcome_mort"),
}

# Models
model_prefix_list = ["lr", "lgbm", "svc", "stack"]
##Can use any X df for input dimension, all = # of features
nn_in_dim = OUTCOME_DICT["surg"]["X_train"].shape[1]

MODEL_DICT = {}
for outcome in OUTCOME_DICT.keys():
    MODEL_DICT[outcome] = get_models(model_prefix_list, outcome)
    nn_dir = BASE_PATH / "models" / outcome / "nn.pt"
    MODEL_DICT[outcome]["nn"] = load_nn_clf(
        data_path=nn_dir, in_dim=nn_in_dim, device=DEVICE
    )

## Calibrate Models

In [None]:
# For each outcome
for outcome_name, model_dict in MODEL_DICT.items():
    if outcome_name == "mort":
        n_splits = 4
    else:
        n_splits = 5
    print(f"Working on outcome: {outcome_name}...")
    # For each model
    for model_name, model in model_dict.items():
        print(f"Model: \t\t{model_name}")
        ### Fit Calibrated Classifier on validation set###
        X_val = OUTCOME_DICT[outcome_name]["X_val"]
        y_val = OUTCOME_DICT[outcome_name]["y_val"].values.ravel()
        # Frozen estimator -- won't refit
        skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED)
        calibrated_model = CalibratedClassifierCV(FrozenEstimator(model), cv=skf)
        calibrated_model.fit(X_val, y_val)
        ### Export model ###
        export_path = BASE_PATH / "cal_models" / outcome_name / f"{model_name}.joblib"
        if export_path.exists():
            export_path.unlink()
        export_path.parent.mkdir(exist_ok=True, parents=True)
        jb.dump(calibrated_model, export_path)
        ### Get prelim results ###
        # Val
        val_proba = calibrated_model.predict_proba(X_val)[:, 1]  # type: ignore
        val_score = roc_auc_score(y_val, val_proba)

        # Train
        X_train = OUTCOME_DICT[outcome_name]["X_train"]
        y_train = OUTCOME_DICT[outcome_name]["y_train"]
        train_proba = calibrated_model.predict_proba(X_train)[:, 1]  # type: ignore
        train_score = roc_auc_score(y_train, train_proba)

        print(f"Train AUROC: \t{train_score:.3f}")
        print(f"Val AUROC: \t{val_score:.3f}")
        print("*" * 20)
    print("-" * 50)