# Build Stacked Generalization Ensemble Model

Import packages/libraries

In [None]:
import sys
import warnings

sys.path.append("../")
from src.data_utils import get_data, get_models
from src.config import SEED, BASE_PATH, DEVICE
from src.nn_models import load_nn_clf

import joblib
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score

## Import Data + Models

Set Globals

In [None]:
MODEL_DIR = BASE_PATH / "models"
# Data
OUTCOME_DICT = {
    "surg": get_data("outcome_surg"),
    "bleed": get_data("outcome_bleed"),
    "asp": get_data("outcome_asp"),
    "mort": get_data("outcome_mort"),
}
# Models
# Models
model_prefix_list = ["lr", "lgbm", "svc"]
##Can use any X df for input dimension, all = # of features
nn_in_dim = OUTCOME_DICT["surg"]["X_train"].shape[1]

MODEL_DICT = {}
for outcome in OUTCOME_DICT.keys():
    MODEL_DICT[outcome] = get_models(model_prefix_list, outcome)
    nn_dir = MODEL_DIR / outcome / "nn.pt"
    MODEL_DICT[outcome]["nn"] = load_nn_clf(
        data_path=nn_dir, in_dim=nn_in_dim, device=DEVICE
    )

## Build Model

In [None]:
for outcome_name, model_dict in MODEL_DICT.items():
    print(f"Working on: {outcome_name}...")

    ### Fit Stack Model ###
    X_train = OUTCOME_DICT[outcome_name]["X_train"]
    y_train = OUTCOME_DICT[outcome_name]["y_train"]
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
    estimators = list(model_dict.items())
    stack_model = StackingClassifier(
        estimators=estimators,
        cv=skf,
        final_estimator=LogisticRegression(random_state=SEED),
    )
    stack_model.fit(X_train, y_train.values.ravel())

    ### Export Model ####
    model_export_path = MODEL_DIR / outcome_name / "stack.joblib"
    if model_export_path.exists():
        warnings.warn(f"Over-writing models at path: {model_export_path}")
        model_export_path.unlink()
    joblib.dump(stack_model, model_export_path)

    ### Prelim results ###
    # Train
    train_proba = stack_model.predict_proba(X_train)[:, 1]  # type: ignore
    train_score = roc_auc_score(y_train, train_proba)
    # Val
    X_val = OUTCOME_DICT[outcome_name]["X_val"]
    y_val = OUTCOME_DICT[outcome_name]["y_val"]
    val_proba = stack_model.predict_proba(X_val)[:, 1]  # type: ignore
    val_score = roc_auc_score(y_val, val_proba)

    print(f"Train AUROC: \t{train_score:.3f}")
    print(f"Val AUROC: \t{val_score:.3f}")