In [1]:
# xgb_discrete_inference.py
import json
from pathlib import Path
import numpy as np
import pandas as pd
import xgboost as xgb
import pickle

# -----------------------------
# Config
# -----------------------------
DASHBOARD_JSON = Path("src/mockData.json")
MODEL_JSON     = Path("../../outputs_discrete_xgb/xgb_discrete_model.json")   # from training script
FEATS_PKL      = Path("../../outputs_discrete_xgb/xgb_discrete_features.pkl") # contains {'order':[t_bin]+feats, 'base_features':[...]}
OUT_JSON       = Path("src/mockData_xgb.json")                                         # overwrite in place

# Your input schema -> rename to match training columns if needed
RENAME_MAP = {
    "temp": "temperature",
    "hr":   "heartrate",
    # add more renames here if needed
}

# Discrete-time bin config
BIN_MINUTES      = 30.0
HORIZON_MINUTES  = 30.0   # per-bin risk you want to rank by (default: next 30m -> t_bin=1)

TBIN_COL   = "t_bin"
EVENT_COL  = "event"      # not used in inference
ID_COL     = "stay_id"    # optional; only for stable sorting if present

# -----------------------------
# Helpers
# -----------------------------
def _coerce_numeric_inplace(df: pd.DataFrame, cols: list[str]) -> None:
    for c in cols:
        if c not in df.columns:
            df[c] = 0.0
        if pd.api.types.is_bool_dtype(df[c]):
            continue
        if not pd.api.types.is_numeric_dtype(df[c]):
            df[c] = pd.to_numeric(df[c], errors="coerce")
        if not pd.api.types.is_bool_dtype(df[c]):
            df[c] = df[c].astype("float32")
    df[cols] = df[cols].fillna(0)

def _compute_tbin(horizon_minutes: float, bin_minutes: float) -> int:
    # bin index is 1-based: (0..30] -> 1, (30..60] -> 2, etc.
    return int(np.ceil(max(1e-9, horizon_minutes) / bin_minutes))


In [2]:

# -----------------------------
# Load model + features
# -----------------------------
if not MODEL_JSON.exists() or not FEATS_PKL.exists():
    raise FileNotFoundError("Trained XGB model or feature list not found. Check paths in Config.")

xgbmodel = xgb.XGBClassifier(device="cpu")
xgbmodel.load_model(MODEL_JSON.as_posix())

with open(FEATS_PKL, "rb") as f:
    payload = pickle.load(f)
ORDER = payload["order"]            # [t_bin] + base_features (training order)
BASE_FEATURES = payload["base_features"]




In [3]:
# -----------------------------
# Load dashboard data
# -----------------------------
df = pd.read_json(DASHBOARD_JSON, lines=False)
mod_df = df.rename(columns=RENAME_MAP).copy()

# Ensure all model features exist
for c in BASE_FEATURES:
    if c not in mod_df.columns:
        mod_df[c] = 0.0  # missing -> 0; change if you prefer another imputation

# Coerce numerics
_coerce_numeric_inplace(mod_df, BASE_FEATURES)



In [4]:
# Build desired time bin (per-bin, not cumulative)
tbin = _compute_tbin(HORIZON_MINUTES, BIN_MINUTES)
G = mod_df[BASE_FEATURES].copy()
G.insert(0, TBIN_COL, np.full(len(G), tbin, dtype=np.int16))

# Ensure column order matches training
need_order = [TBIN_COL] + BASE_FEATURES
if ORDER and isinstance(ORDER, list):
    # Training saved order takes precedence
    need_order = ORDER



In [6]:
# Predict per-bin risk (probability of event in this bin)
probs = xgbmodel.predict_proba(G[need_order])[:, 1]
probs_pct = (np.round(probs, 3) * 100).astype(float)
probs_pct

array([77.5       , 68.69999695, 32.79999924, 46.59999847, 89.6000061 ,
       32.20000076, 46.        , 31.09999847,  3.5999999 , 36.09999847])

In [7]:

# -----------------------------
# Update dashboard fields
# -----------------------------
# Example: keep your previous ensemble score if you want, but here we
# add a pure XGB per-bin risk and rank by it.
df["xgbRiskPct_bin"] = probs_pct
df["xgbRisk_bin_idx"] = tbin  # which bin this probability refers to
df["priorityRank"] = df["xgbRiskPct_bin"].rank(ascending=False, method="first").astype(int)

# (Optional) Keep your trends updated like in your Cox code
for i in range(len(df)):
    # Defensive guards in case the structure varies
    try:
        df.at[i, "trends"][-1]["temp"] = df.at[i, "temp"]
        df.at[i, "trends"][-1]["heartRate"] = df.at[i, "hr"]
        if "lactate" in df.columns:
            df.at[i, "trends"][-1]["lactate"] = df.at[i, "lactate"]
    except Exception:
        pass

# Sort by priority and write back
#df = df.sort_values(by="priorityRank", kind="mergesort").reset_index(drop=True)
df.to_json(OUT_JSON, orient="records", lines=False, indent=1)

print(f"✅ Updated {OUT_JSON}")
print(f"   Used per-bin risk at horizon={HORIZON_MINUTES} min (t_bin={tbin})")

✅ Updated src/mockData_xgb.json
   Used per-bin risk at horizon=30.0 min (t_bin=1)
