In [22]:
import numpy as np
import pandas as pd
import json
import joblib


In [23]:

from dataclasses import dataclass
from typing import List, Tuple

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, random_split

# >>> ADD
import os, json
from pathlib import Path
from datetime import datetime

class MLP(nn.Module):
    def __init__(
        self,
        in_features: int,
        hidden_layers: Tuple[int, ...],
        activation: str = "relu",
        dropout: float = 0.0,
        use_batchnorm: bool = False,
    ):
        super().__init__()
        acts = {
            "relu": nn.ReLU,
            "gelu": nn.GELU,
            "silu": nn.SiLU,
            "tanh": nn.Tanh,
        }
        if activation not in acts:
            raise ValueError(f"Unsupported activation: {activation}")

        layers: List[nn.Module] = []
        last = in_features
        for h in hidden_layers:
            layers.append(nn.Linear(last, h))
            if use_batchnorm:
                layers.append(nn.BatchNorm1d(h))
            layers.append(acts[activation]())
            if dropout > 0:
                layers.append(nn.Dropout(dropout))
            last = h

        self.backbone = nn.Sequential(*layers) if layers else nn.Identity()
        self.out = nn.Linear(last, 1)

        # Kaiming init for ReLU-like, Xavier otherwise
        for m in self.modules():
            if isinstance(m, nn.Linear):
                if activation in ("relu", "silu"):
                    nn.init.kaiming_normal_(m.weight, nonlinearity="relu")
                else:
                    nn.init.xavier_normal_(m.weight)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)

    def forward(self, x):
        z = self.backbone(x)
        logits = self.out(z)  # [B, 1]
        return logits

In [24]:
required_cols = ['temperature', 'heartrate', 'resprate', 'o2sat', 'sbp', 'dbp', 'pain', 'rhythm_flag', 'is_white', 'is_black', 'is_asian', 'is_hispanic', 'is_other_race', 'gender_F', 'gender_M', 'arrival_transport_AMBULANCE', 'arrival_transport_HELICOPTER', 'arrival_transport_OTHER', 'arrival_transport_UNKNOWN', 'arrival_transport_WALK IN', 'lactate', 'wbc', 'time_since_adm', 'gsn_16599.0', 'gsn_43952.0', 'gsn_4490.0', 'gsn_66419.0', 'gsn_61716.0']
your_input_dim = len(required_cols)
print(f"Input dimension: {your_input_dim}")

Input dimension: 28


In [25]:
with open("../../grid_runs/top5_results.json", "r") as f:
    top5 = json.load(f)

best_cfg = top5[0]["config"]

mlp_model = MLP(
    in_features=your_input_dim,
    hidden_layers=tuple(best_cfg["layers"]),
    activation=best_cfg["activation"],
    dropout=best_cfg["dropout"],
    use_batchnorm=best_cfg["batchnorm"]
)

state_dict = torch.load("../../grid_runs/best_model.pt", map_location="cpu")
mlp_model.load_state_dict(state_dict)
mlp_model.eval()

MLP(
  (backbone): Sequential(
    (0): Linear(in_features=28, out_features=128, bias=True)
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
  )
  (out): Linear(in_features=64, out_features=1, bias=True)
)

In [26]:
import xgboost as xgb
import numpy as np

# Load CPU model
xgbmodel = xgb.XGBClassifier(device='cpu')  # works for XGBoost ≥ 2.0
xgbmodel.load_model("../../grid_runs_xgb/best_xgb.bin")


In [27]:
# model
model = joblib.load('../../best_logistic_regression_model.joblib')
model

0,1,2
,steps,"[('scaler', ...), ('clf', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,penalty,'elasticnet'
,dual,False
,tol,0.0001
,C,0.01
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,42
,solver,'saga'
,max_iter,100


In [28]:
# read json
df = pd.read_json('src/mockData.json', lines=False)
df

Unnamed: 0,patientId,priorityRank,sepsisScore,hazardRate,lastVitalTime,location,temp,hr,resprate,o2sat,...,arrival_transport_WALK IN,lactate,wbc,time_since_adm,gsn_16599.0,gsn_43952.0,gsn_4490.0,gsn_66419.0,gsn_61716.0,trends
0,31408129,1,99.300003,0.200678,48,ICU North,97.8,91,28,95,...,0,1.7,17.7,14.166667,0,0,0,0,0,"[{'time': '3h ago', 'temp': 101.9, 'heartRate'..."
1,30154746,2,83.25,0.136532,42,Observation 3,99.7,88,16,98,...,1,1.8,16.9,11.666667,0,1,0,0,0,"[{'time': '3h ago', 'temp': 99.9, 'heartRate':..."
2,37560316,3,83.849998,0.103225,56,Med-Surg North,99.0,92,17,95,...,1,1.6,1.0,11.766667,0,0,0,0,0,"[{'time': '3h ago', 'temp': 99.4, 'heartRate':..."
3,30998534,4,99.5,0.102873,6,Stepdown West,102.8,123,20,99,...,0,2.9,8.0,3.95,0,0,0,0,0,"[{'time': '3h ago', 'temp': 100.1, 'heartRate'..."
4,31367180,5,3.9,0.101112,35,Telemetry A,98.1,88,16,100,...,1,1.6,11.1,3.0,0,0,0,0,0,"[{'time': '3h ago', 'temp': 100.8, 'heartRate'..."
5,39747582,6,94.75,0.085138,20,Med-Surg South,99.0,99,20,98,...,0,1.0,9.9,10.65,0,0,0,0,0,"[{'time': '3h ago', 'temp': 98.7, 'heartRate':..."
6,34897912,7,5.45,0.073321,240,Telemetry B,98.1,79,18,98,...,0,0.9,14.4,4.083333,0,0,0,0,0,"[{'time': '3h ago', 'temp': 99.1, 'heartRate':..."
7,31391746,8,1.75,0.05356,14,ICU South,98.0,67,16,100,...,0,1.6,7.8,2.333333,0,0,0,0,0,"[{'time': '3h ago', 'temp': 100.2, 'heartRate'..."
8,39788549,9,0.95,0.046085,22,Stepdown East,97.9,73,17,99,...,1,1.6,7.8,5.033333,0,0,0,0,0,"[{'time': '3h ago', 'temp': 101.5, 'heartRate'..."
9,32858107,10,92.75,0.007099,1,Observation 1,97.8,105,19,96,...,0,1.7,8.1,12.283333,0,0,0,0,0,"[{'time': '3h ago', 'temp': 101.3, 'heartRate'..."


In [29]:
# for each row, recalculate sepsis score
mod_df = df.rename(columns={"temp": "temperature", "hr": "heartrate"})
required_cols = ['temperature', 'heartrate', 'resprate', 'o2sat', 'sbp', 'dbp', 'pain', 'rhythm_flag', 'is_white', 'is_black', 'is_asian', 'is_hispanic', 'is_other_race', 'gender_F', 'gender_M', 'arrival_transport_AMBULANCE', 'arrival_transport_HELICOPTER', 'arrival_transport_OTHER', 'arrival_transport_UNKNOWN', 'arrival_transport_WALK IN', 'lactate', 'wbc', 'time_since_adm', 'gsn_16599.0', 'gsn_43952.0', 'gsn_4490.0', 'gsn_66419.0', 'gsn_61716.0']
X = mod_df[required_cols]
time_since_check = df["lastVitalTime"].values/60

In [30]:
# infer with this X
sepsis_scores = model.predict_proba(X)[:, 1]
sepsis_scores = sepsis_scores.round(3) * 100
sepsis_scores



array([97.9, 73. , 29.5, 85.4, 12.3, 83.2, 23.1,  7.2,  6. , 85.6])

In [31]:
with torch.no_grad():
    X_test = torch.tensor(X.to_numpy(), dtype=torch.float32)
    logits = mlp_model(X_test)
    probs = torch.sigmoid(logits)

probs = probs.squeeze().numpy()
probs = probs.round(3) * 100
probs

array([98.6     , 87.2     , 70.700005, 99.      ,  6.3     , 95.      ,
        9.6     ,  3.1     ,  1.7     , 92.799995], dtype=float32)

In [32]:
xgb_probs = xgbmodel.predict_proba(X)[:, 1]
xgb_probs = xgb_probs.round(3) * 100
xgb_probs

array([100.       ,  79.299995 ,  97.       , 100.       ,   1.5      ,
        94.5      ,   1.3000001,   0.4      ,   0.2      ,  92.7      ],
      dtype=float32)

In [33]:
sepsis_scores = np.add(probs, xgb_probs) / 2
sepsis_scores = sepsis_scores.round(3)
sepsis_scores

array([99.3 , 83.25, 83.85, 99.5 ,  3.9 , 94.75,  5.45,  1.75,  0.95,
       92.75], dtype=float32)

In [34]:
# update df with new sepsis scores
df['sepsisScore'] = sepsis_scores
df

Unnamed: 0,patientId,priorityRank,sepsisScore,hazardRate,lastVitalTime,location,temp,hr,resprate,o2sat,...,arrival_transport_WALK IN,lactate,wbc,time_since_adm,gsn_16599.0,gsn_43952.0,gsn_4490.0,gsn_66419.0,gsn_61716.0,trends
0,31408129,1,99.300003,0.200678,48,ICU North,97.8,91,28,95,...,0,1.7,17.7,14.166667,0,0,0,0,0,"[{'time': '3h ago', 'temp': 101.9, 'heartRate'..."
1,30154746,2,83.25,0.136532,42,Observation 3,99.7,88,16,98,...,1,1.8,16.9,11.666667,0,1,0,0,0,"[{'time': '3h ago', 'temp': 99.9, 'heartRate':..."
2,37560316,3,83.849998,0.103225,56,Med-Surg North,99.0,92,17,95,...,1,1.6,1.0,11.766667,0,0,0,0,0,"[{'time': '3h ago', 'temp': 99.4, 'heartRate':..."
3,30998534,4,99.5,0.102873,6,Stepdown West,102.8,123,20,99,...,0,2.9,8.0,3.95,0,0,0,0,0,"[{'time': '3h ago', 'temp': 100.1, 'heartRate'..."
4,31367180,5,3.9,0.101112,35,Telemetry A,98.1,88,16,100,...,1,1.6,11.1,3.0,0,0,0,0,0,"[{'time': '3h ago', 'temp': 100.8, 'heartRate'..."
5,39747582,6,94.75,0.085138,20,Med-Surg South,99.0,99,20,98,...,0,1.0,9.9,10.65,0,0,0,0,0,"[{'time': '3h ago', 'temp': 98.7, 'heartRate':..."
6,34897912,7,5.45,0.073321,240,Telemetry B,98.1,79,18,98,...,0,0.9,14.4,4.083333,0,0,0,0,0,"[{'time': '3h ago', 'temp': 99.1, 'heartRate':..."
7,31391746,8,1.75,0.05356,14,ICU South,98.0,67,16,100,...,0,1.6,7.8,2.333333,0,0,0,0,0,"[{'time': '3h ago', 'temp': 100.2, 'heartRate'..."
8,39788549,9,0.95,0.046085,22,Stepdown East,97.9,73,17,99,...,1,1.6,7.8,5.033333,0,0,0,0,0,"[{'time': '3h ago', 'temp': 101.5, 'heartRate'..."
9,32858107,10,92.75,0.007099,1,Observation 1,97.8,105,19,96,...,0,1.7,8.1,12.283333,0,0,0,0,0,"[{'time': '3h ago', 'temp': 101.3, 'heartRate'..."


In [35]:
# output to json
df.to_json("src/mockData.json", orient="records", lines=False, indent=1)

In [36]:
import pickle, pandas as pd
from lifelines import CoxPHFitter, CoxTimeVaryingFitter

# Load model
with open("../../cox_models/coxph_static.pkl", "rb") as f:
    cph: CoxPHFitter = pickle.load(f)

# Load scaler + feature list
with open("../../cox_models/coxph_static_scaler.pkl", "rb") as f:
    payload = pickle.load(f)
scaler = payload["scaler"]
feats = payload["features"]

with open("../../cox_models/cox_tvc.pkl", "rb") as f:
    ctv: CoxTimeVaryingFitter = pickle.load(f)

payload_tvc = pickle.load(open("../../cox_models/cox_tvc_scaler.pkl", "rb"))

# # Prepare new data frame `df_new` with the same columns
# df_new_scaled = df_new.copy()
# df_new_scaled.loc[:, feats] = scaler.transform(df_new_scaled[feats])

# # Predict (example)
# risk = cph.predict_partial_hazard(df_new_scaled)


In [37]:


# Prepare new data frame `df_new` with the same columns
cph_X = X.copy()
cph_X.loc[:, feats] = scaler.transform(cph_X[feats])

# Predict (example)
risk = cph.predict_partial_hazard(cph_X)


 -0.27664335 -0.97677547 -0.62670941  1.24030958]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  cph_X.loc[:, feats] = scaler.transform(cph_X[feats])
  0.10537244 -0.76592421 -0.33027588  0.54102076]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  cph_X.loc[:, feats] = scaler.transform(cph_X[feats])
  0.06832232  0.8248324   0.44657736 -0.68818777]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  cph_X.loc[:, feats] = scaler.transform(cph_X[feats])
  0.10515665 -0.70419841  0.81929346 -1.41833522]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  cph_X.loc[:, feats] = scaler.transform(cph_X[feats])
  0.08353478 -0.35372469  1.24956003  0.66654741]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  cph_X.loc[:, feats] = scaler.transform(cph_X[feats])
 -0.59630786 -0.59630786  0.9503467

In [38]:
def prepare_static_inference_frame(raw_df: pd.DataFrame, scaler_payload: dict) -> pd.DataFrame:
    """
    Returns a *new* DataFrame where feature columns are scaled (float64) and
    non-feature columns are preserved. No in-place assignment into raw_df.
    """
    feats  = scaler_payload["features"]
    scaler = scaler_payload["scaler"]

    # ensure float for transform input
    X = scaler.transform(raw_df[feats].to_numpy(dtype="float64"))

    X_df = pd.DataFrame(X, index=raw_df.index, columns=feats)
    non_feats = raw_df.drop(columns=feats)

    return pd.concat([non_feats, X_df], axis=1)

def prepare_tvc_inference_frame(raw_df: pd.DataFrame, scaler_payload: dict) -> pd.DataFrame:
    feats  = scaler_payload["features"]
    scaler = scaler_payload["scaler"]

    X = scaler.transform(raw_df[feats].to_numpy(dtype="float64"))
    X_df = pd.DataFrame(X, index=raw_df.index, columns=feats)
    non_feats = raw_df.drop(columns=feats)

    return pd.concat([non_feats, X_df], axis=1)


df_pred_cox = prepare_static_inference_frame(X, payload)
df_pred_tvc = prepare_tvc_inference_frame(X, payload_tvc)

print(df_pred_cox.head())
print(df_pred_tvc.head())

   gender_F  gender_M  arrival_transport_AMBULANCE  \
0         0         1                            1   
1         0         1                            0   
2         0         1                            0   
3         1         0                            0   
4         0         1                            0   

   arrival_transport_HELICOPTER  arrival_transport_OTHER  \
0                             0                        0   
1                             0                        0   
2                             0                        0   
3                             0                        0   
4                             0                        0   

   arrival_transport_UNKNOWN  arrival_transport_WALK IN  temperature  \
0                          0                          0    -0.110836   
1                          0                          1     0.538579   
2                          0                          1     0.299321   
3                         

In [39]:

scores_cox_matrix = cph.predict_cumulative_hazard(df_pred_cox, times=time_since_check)

# create dict for index and time
time_since_check_dict = dict(zip(time_since_check.astype("float64").tolist(), range(df.shape[0])))
indices = list(time_since_check_dict.items())
print("Indices:", indices)


scores_cox = np.array([scores_cox_matrix.loc[i] for i in indices])



print("CoxPH scores:", scores_cox)


Indices: [(0.8, 0), (0.7, 1), (0.9333333333333333, 2), (0.1, 3), (0.5833333333333334, 4), (0.3333333333333333, 5), (4.0, 6), (0.23333333333333334, 7), (0.36666666666666664, 8), (0.016666666666666666, 9)]
CoxPH scores: [0.20067824 0.13653168 0.10322495 0.1028728  0.10111244 0.08513778
 0.4290144  0.05356048 0.04608452 0.00709931]


In [40]:
df["hazardRate"] = scores_cox


In [41]:
# get priority rank based on scores
df["priorityRank"] = df["hazardRate"].rank(ascending=False).astype(int)

# sort location by priority rank
df = df.sort_values(by="priorityRank")
df

Unnamed: 0,patientId,priorityRank,sepsisScore,hazardRate,lastVitalTime,location,temp,hr,resprate,o2sat,...,arrival_transport_WALK IN,lactate,wbc,time_since_adm,gsn_16599.0,gsn_43952.0,gsn_4490.0,gsn_66419.0,gsn_61716.0,trends
6,34897912,1,5.45,0.429014,240,Telemetry B,98.1,79,18,98,...,0,0.9,14.4,4.083333,0,0,0,0,0,"[{'time': '3h ago', 'temp': 99.1, 'heartRate':..."
0,31408129,2,99.300003,0.200678,48,ICU North,97.8,91,28,95,...,0,1.7,17.7,14.166667,0,0,0,0,0,"[{'time': '3h ago', 'temp': 101.9, 'heartRate'..."
1,30154746,3,83.25,0.136532,42,Observation 3,99.7,88,16,98,...,1,1.8,16.9,11.666667,0,1,0,0,0,"[{'time': '3h ago', 'temp': 99.9, 'heartRate':..."
2,37560316,4,83.849998,0.103225,56,Med-Surg North,99.0,92,17,95,...,1,1.6,1.0,11.766667,0,0,0,0,0,"[{'time': '3h ago', 'temp': 99.4, 'heartRate':..."
3,30998534,5,99.5,0.102873,6,Stepdown West,102.8,123,20,99,...,0,2.9,8.0,3.95,0,0,0,0,0,"[{'time': '3h ago', 'temp': 100.1, 'heartRate'..."
4,31367180,6,3.9,0.101112,35,Telemetry A,98.1,88,16,100,...,1,1.6,11.1,3.0,0,0,0,0,0,"[{'time': '3h ago', 'temp': 100.8, 'heartRate'..."
5,39747582,7,94.75,0.085138,20,Med-Surg South,99.0,99,20,98,...,0,1.0,9.9,10.65,0,0,0,0,0,"[{'time': '3h ago', 'temp': 98.7, 'heartRate':..."
7,31391746,8,1.75,0.05356,14,ICU South,98.0,67,16,100,...,0,1.6,7.8,2.333333,0,0,0,0,0,"[{'time': '3h ago', 'temp': 100.2, 'heartRate'..."
8,39788549,9,0.95,0.046085,22,Stepdown East,97.9,73,17,99,...,1,1.6,7.8,5.033333,0,0,0,0,0,"[{'time': '3h ago', 'temp': 101.5, 'heartRate'..."
9,32858107,10,92.75,0.007099,1,Observation 1,97.8,105,19,96,...,0,1.7,8.1,12.283333,0,0,0,0,0,"[{'time': '3h ago', 'temp': 101.3, 'heartRate'..."


In [42]:
df.to_json("src/mockData.json", orient="records", lines=False, indent=1)

In [45]:
history = df["trends"]
history[0]

[{'time': '3h ago', 'temp': 101.9, 'heartRate': 134, 'lactate': 3.3},
 {'time': '2h ago', 'temp': 101.6, 'heartRate': 131, 'lactate': 3.1},
 {'time': '1h ago', 'temp': 101.4, 'heartRate': 129, 'lactate': 2.9},
 {'time': 'Now', 'temp': 101.3, 'heartRate': 128, 'lactate': 2.8}]

In [50]:
# make sure last entry in trends is the same as current temp, heartRate, lactate
history[0][-1]["temp"] = df.loc[0, "temp"]
history[0][-1]["heartRate"] = df.loc[0, "hr"]
history[0][-1]["lactate"] = df.loc[0, "lactate"]

In [52]:
# do this for all rows
for i in range(len(df)):
    df["trends"][i][-1]["temp"] = df.loc[i, "temp"]
    df["trends"][i][-1]["heartRate"] = df.loc[i, "hr"]
    df["trends"][i][-1]["lactate"] = df.loc[i, "lactate"]

In [53]:
df.to_json("src/mockData.json", orient="records", lines=False, indent=1)