In [2]:
import pandas as pd
from pathlib import Path

DATA_PATH = Path("../data/MSDS_database_cleaned_deidentified_revised.xlsx")
df = pd.read_excel(DATA_PATH)

df.columns = df.columns.astype(str).str.replace("\n"," ").str.replace(r"\s+"," ", regex=True).str.strip()
df.shape

(120, 93)

In [3]:
target = "mech_fail_last"
df[target].value_counts(dropna=False)


mech_fail_last
0.0    90
1.0    29
NaN     1
Name: count, dtype: int64

In [4]:
df['ALIF'].value_counts()

ALIF
1    120
Name: count, dtype: int64

In [5]:
preop_cols = ['age', 'sex', 'PI_preop', 'PT_preop', 'LL_preop', 'SS_preop', 'T4PA_preop', 'L1PA_preop', 
              'SVA_preop', 'cobb_main_curve_preop', 'FC_preop', 'tscore_femneck_preop', 'HU_UIV_preop', 
              'HU_UIVplus1_preop', 'HU_UIVplus2_preop', 'num_levels']

plan_cols = ["UIV_implant", "num_fused_levels", "ALIF", "XLIF", "TLIF", "num_rods", "num_screws", "osteotomy"]

features = preop_cols + plan_cols
features = [c for c in features if c in df.columns]

X = df[features].copy()
y = df[target].copy()

mask = y.notna()
X = X.loc[mask]
y = y.loc[mask].astype(int)

X.shape, y.value_counts()


((119, 24),
 mech_fail_last
 0    90
 1    29
 Name: count, dtype: int64)

In [6]:
preop_cols

['age',
 'sex',
 'PI_preop',
 'PT_preop',
 'LL_preop',
 'SS_preop',
 'T4PA_preop',
 'L1PA_preop',
 'SVA_preop',
 'cobb_main_curve_preop',
 'FC_preop',
 'tscore_femneck_preop',
 'HU_UIV_preop',
 'HU_UIVplus1_preop',
 'HU_UIVplus2_preop',
 'num_levels']

In [7]:
import numpy as np
from sklearn.model_selection import StratifiedKFold, cross_val_predict
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression

cat_cols = [c for c in X.columns if X[c].dtype == "object"]
num_cols = [c for c in X.columns if c not in cat_cols]

preprocess = ColumnTransformer(
    transformers=[
        ("num", Pipeline([("imputer", SimpleImputer(strategy="median")),
                          ("scaler", StandardScaler())]), num_cols),
        ("cat", Pipeline([("imputer", SimpleImputer(strategy="most_frequent")),
                          ("onehot", OneHotEncoder(handle_unknown="ignore"))]), cat_cols),
    ]
)

model = LogisticRegression(max_iter=2000, class_weight="balanced")
pipe = Pipeline([("preprocess", preprocess), ("model", model)])

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

probs = cross_val_predict(pipe, X, y, cv=cv, method="predict_proba")[:, 1]
probs[:10]


  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + 

array([0.78484407, 0.22007698, 0.22457858, 0.71394014, 0.34797104,
       0.42914456, 0.54416782, 0.13033537, 0.52303551, 0.28779948])

In [8]:
from sklearn.metrics import roc_auc_score, average_precision_score, confusion_matrix, precision_score, recall_score, f1_score

auc = roc_auc_score(y, probs)
ap  = average_precision_score(y, probs)

preds = (probs >= 0.5).astype(int)
tn, fp, fn, tp = confusion_matrix(y, preds).ravel()

print("ROC-AUC:", auc)
print("Avg Precision (PR-AUC):", ap)
print("Precision:", precision_score(y, preds, zero_division=0))
print("Recall:", recall_score(y, preds, zero_division=0))
print("F1:", f1_score(y, preds, zero_division=0))
print({"tn": tn, "fp": fp, "fn": fn, "tp": tp})


ROC-AUC: 0.5739463601532567
Avg Precision (PR-AUC): 0.32398384205601666
Precision: 0.2826086956521739
Recall: 0.4482758620689655
F1: 0.3466666666666667
{'tn': 57, 'fp': 33, 'fn': 16, 'tp': 13}


In [9]:
results = df.loc[mask, :].copy()
results["pred_fail_prob_cv"] = probs

results[["pred_fail_prob_cv", "mech_fail_last"]].sort_values("pred_fail_prob_cv", ascending=False).head(15)


Unnamed: 0,pred_fail_prob_cv,mech_fail_last
79,0.988648,0.0
52,0.961685,1.0
22,0.950776,1.0
28,0.946098,0.0
21,0.942694,0.0
47,0.927672,1.0
83,0.910086,0.0
29,0.888469,0.0
101,0.886764,1.0
18,0.875918,0.0


In [10]:
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

thresholds = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7]
rows = []
for t in thresholds:
    preds_t = (probs >= t).astype(int)
    tn, fp, fn, tp = confusion_matrix(y, preds_t).ravel()
    rows.append({
        "threshold": t,
        "precision": precision_score(y, preds_t, zero_division=0),
        "recall": recall_score(y, preds_t, zero_division=0),
        "f1": f1_score(y, preds_t, zero_division=0),
        "tp": tp, "fp": fp, "tn": tn, "fn": fn
    })

pd.DataFrame(rows)


Unnamed: 0,threshold,precision,recall,f1,tp,fp,tn,fn
0,0.2,0.275862,0.827586,0.413793,24,63,27,5
1,0.3,0.26087,0.62069,0.367347,18,51,39,11
2,0.4,0.272727,0.517241,0.357143,15,40,50,14
3,0.5,0.282609,0.448276,0.346667,13,33,57,16
4,0.6,0.314286,0.37931,0.34375,11,24,66,18
5,0.7,0.291667,0.241379,0.264151,7,17,73,22


In [11]:
# Fit on all data for demonstration (not evaluation)
pipe.fit(X, y)

# pick one patient row
i = X.index[0]
x0 = X.loc[[i]].copy()

p_base = pipe.predict_proba(x0)[:, 1][0]

# toggle ALIF if it exists
x1 = x0.copy()
if "ALIF" in x1.columns:
    x1["ALIF"] = 1 - int(x1["ALIF"].iloc[0])

p_new = pipe.predict_proba(x1)[:, 1][0]

print("Base risk:", p_base)
print("Toggled ALIF risk:", p_new)
print("Change:", p_new - p_base)


Base risk: 0.1131948675274806
Toggled ALIF risk: 0.1131948675274806
Change: 0.0


  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights


In [12]:
# VG added to save the model

from pathlib import Path
import joblib

ARTIFACT_DIR = Path("../artifacts")
ARTIFACT_DIR.mkdir(parents=True, exist_ok=True)

bundle = {
    "pipe": pipe,
    "features": features,
    "target": "mech_fail_last",
}

joblib.dump(bundle, ARTIFACT_DIR / "mech_fail_model.joblib")
print("Saved:", ARTIFACT_DIR / "mech_fail_model.joblib")


Saved: ../artifacts/mech_fail_model.joblib


In [13]:
# Fit on all data for demonstration
pipe.fit(X, y)

def toggle_and_diff(col, n=10):
    diffs = []
    idxs = list(X.index)[:n]
    for i in idxs:
        x0 = X.loc[[i]].copy()
        if col not in x0.columns:
            continue
        if pd.isna(x0[col].iloc[0]):
            continue
        try:
            base = pipe.predict_proba(x0)[:,1][0]
            x1 = x0.copy()
            x1[col] = 1 - int(x1[col].iloc[0])  # assumes 0/1
            new = pipe.predict_proba(x1)[:,1][0]
            diffs.append(new - base)
        except:
            pass
    return diffs

for c in ["ALIF", "TLIF", "XLIF"]:
    if c in X.columns:
        d = toggle_and_diff(c, n=30)
        print(c, "nonzero diffs:", sum(abs(x) > 1e-6 for x in d), "avg abs change:", np.mean(np.abs(d)) if d else None)


  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights


ALIF nonzero diffs: 0 avg abs change: 0.0
TLIF nonzero diffs: 30 avg abs change: 0.5097843004143388
XLIF nonzero diffs: 30 avg abs change: 0.009682999727538446


In [14]:
import numpy as np
import pandas as pd

# Fit model on all data (for scoring / optimizer use)
pipe.fit(X, y)

FEATURES = list(X.columns)

def score_mech_fail(preop_plan_dict: dict) -> float:
    """
    Input: dict with patient preop + plan fields (keys should match FEATURES).
    Missing keys are allowed (treated as NaN and imputed).
    Output: probability of mechanical failure (0..1).
    """
    row = {c: preop_plan_dict.get(c, np.nan) for c in FEATURES}
    X_new = pd.DataFrame([row], columns=FEATURES)
    return float(pipe.predict_proba(X_new)[:, 1][0])

# demo (uses an existing patient row)
demo_dict = X.iloc[0].to_dict()
print("Predicted risk:", score_mech_fail(demo_dict))


Predicted risk: 0.1131948675274806


  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  raw_prediction = X @ weights + intercept
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
  grad[:n_features] = X.T @ grad_pointwise + l2_reg_strength * weights
