# Create Stack Model

## Set Up

Import packages

In [None]:
import sys
import warnings

sys.path.append("../")
from src.data_utils import get_data, get_models
from src.config import SEED, BASE_PATH
from src.nn_model import load_nn_clf
import joblib
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score


print(f"Path: {BASE_PATH}")

Import Data + Models

In [None]:
# Data
file_dir = BASE_PATH / "data" / "processed"
OUTCOME_DICT = {
    "med": get_data("med_outcome", file_dir),
    "surg": get_data("surg_outcome", file_dir),
    "mort": get_data("mort_outcome", file_dir),
    "reop": get_data("reop_outcome", file_dir),
    "vte": get_data("vte_outcome", file_dir),
}
X_shape = OUTCOME_DICT["surg"]["X_train"].shape[1]  # same for all outcomes
# Models
model_dir = BASE_PATH / "models" / "trained"
model_prefix_list = ["lgbm", "lr", "xgb"]
MODEL_DICT = {}

for outcome in OUTCOME_DICT.keys():
    ## Base models
    MODEL_DICT[outcome] = get_models(model_prefix_list, outcome, model_dir)
    ## Neural network
    nn_import = load_nn_clf(
        data_path=BASE_PATH / "models" / "trained" / outcome / "nn.pt",
        in_dim=X_shape,
        device="cpu",
    )
    MODEL_DICT[outcome]["nn"] = nn_import

N_SPLITS = 5

## Build Model

In [None]:
for outcome_name, model_dict in MODEL_DICT.items():
    print(f"Working on: {outcome_name}...")

    ## Train
    X_train = OUTCOME_DICT[outcome_name]["X_train"]
    y_train = OUTCOME_DICT[outcome_name]["y_train"]
    # Val
    X_val = OUTCOME_DICT[outcome_name]["X_val"]
    y_val = OUTCOME_DICT[outcome_name]["y_val"]
    # Fit stack
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
    estimators = list(model_dict.items())
    stack_model = StackingClassifier(
        estimators=estimators,
        cv=skf,
        passthrough=True,
        final_estimator=LogisticRegression(random_state=SEED),
        n_jobs=-1,
    )
    stack_model.fit(X_train, y_train.values.ravel())

    ### Export Model ####
    model_export_path = model_dir / outcome_name / "stack.joblib"
    if model_export_path.exists():
        warnings.warn(f"Over-writing models at path: {model_export_path}")
        model_export_path.unlink()
    joblib.dump(stack_model, model_export_path)

    ### Prelim results ###
    train_proba = stack_model.predict_proba(X_train)[:, 1]  # type: ignore
    val_proba = stack_model.predict_proba(X_val)[:, 1]  # type: ignore
    train_score = roc_auc_score(y_train, train_proba)
    val_score = roc_auc_score(y_val, val_proba)
    print(f"Train AUROC: \t{train_score:.3f}")
    print(f"Val AUROC: \t{val_score:.3f}")