In [None]:
# Cell 1: imports and Snowflake connection
import pandas as pd
import snowflake.connector as sf
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import roc_auc_score, classification_report
import joblib
import os

conn = sf.connect(
    user="YOUR_USER",
    password="YOUR_PASSWORD",
    account="YOUR_ACCOUNT",
    warehouse="YOUR_WAREHOUSE",
    database="RCM_LAB",
    schema="PUBLIC"
)


In [None]:
# Cell 2: load feature table
query = "SELECT * FROM CLAIM_FEATURES"
df = pd.read_sql(query, conn)
print(df.head())


In [None]:
# Cell 3: prepare features and labels
X = pd.get_dummies(df.drop(columns=["DENIAL_FLAG"]))
y = df["DENIAL_FLAG"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
print(X_train.shape, y_train.mean())


In [None]:
# Cell 4: model training
model = XGBClassifier(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)
model.fit(X_train, y_train)


In [None]:
# Cell 5: evaluation
probs = model.predict_proba(X_test)[:, 1]
preds = (probs > 0.5).astype(int)

print("AUC:", roc_auc_score(y_test, probs))
print(classification_report(y_test, preds))


In [None]:
# Cell 6: save model locally
os.makedirs("../models", exist_ok=True)
joblib.dump(model, "../models/denial_xgb.pkl")


In [None]:
# Cell 7: optional – push predictions back to Snowflake
pred_df = X_test.copy()
pred_df["PROB_DENIAL"] = probs
pred_df["PRED_DENIAL"] = preds

success, nchunks, _ = conn.write_pandas(
    pred_df.reset_index(drop=True),
    table_name="PRED_DENIAL_SCORES",
    database="RCM_LAB",
    schema="PUBLIC",
    overwrite=True
)
print("Uploaded", nchunks, "chunks")
