In [1]:
import os

# 1. Setup Data Paths
nhanes_step_count_dir = "./data/nhanes-step-count/"
subject_info_path = os.path.join(nhanes_step_count_dir, "subject-info.csv")
actisteps_path = os.path.join(nhanes_step_count_dir, "nhanes_1440_actisteps.csv.xz")
ac_path = os.path.join(nhanes_step_count_dir, "nhanes_1440_AC.csv.xz")
mims_path = os.path.join(nhanes_step_count_dir, "nhanes_1440_PAXMTSM.csv.xz")

nhanes_lab_dir = "./data/nhanes-lab/"
ghb_path_2011 = os.path.join(nhanes_lab_dir, "ghb-2011-12.xpt")
ghb_path_2013 = os.path.join(nhanes_lab_dir, "ghb-2013-14.xpt")

nhanes_questionnaire_dir = "./data/nhanes-questionnaire/"
bpq_path_2011 = os.path.join(nhanes_questionnaire_dir, "bpq-2011-12.xpt")
bpq_path_2013 = os.path.join(nhanes_questionnaire_dir, "bpq-2013-14.xpt")

In [7]:
import pandas as pd

# 2. Load NHANES Step-Count Data
print("Loading Wearable Movement Data... (this may take a few minutes)")

subj_df = pd.read_csv(subject_info_path, dtype={"SEQN": "Int64"})
actisteps_df = pd.read_csv(actisteps_path, dtype={"SEQN": "Int64"}, low_memory=False)
ac_df = pd.read_csv(ac_path, dtype={"SEQN": "Int64"}, low_memory=False)
mims_df = pd.read_csv(mims_path, dtype={"SEQN": "Int64"}, low_memory=False)

print("Wearable Data Shape:")
print("- Subject Info:", subj_df.shape)
print("- Actisteps:", actisteps_df.shape)
print("- Activity Counts:", ac_df.shape)
print("- MIMS:", mims_df.shape)

Loading Wearable Movement Data... (this may take a few minutes)
Wearable Data Shape:
- Subject Info: (19931, 8)
- Actisteps: (130186, 1443)
- Activity Counts: (130186, 1443)
- MIMS: (130186, 1443)


In [5]:
# 3. Feature Engineering for NHANES Step-Count Data
print("Computing Wearable Data Features...")

# Compute Daily Step Statistics
actisteps_minute_cols = [c for c in actisteps_df.columns if c.startswith("min_")]
actisteps_df["daily_steps"] = actisteps_df[actisteps_minute_cols].sum(axis=1, numeric_only=True)
actisteps_df["valid_day"] = actisteps_df["daily_steps"] > 0
actisteps_df = actisteps_df[actisteps_df["valid_day"]]
actisteps_agg = actisteps_df.groupby("SEQN").agg(
    mean_daily_steps=("daily_steps", "mean"),
    sd_daily_steps=("daily_steps", "std"),
).reset_index()
actisteps_agg["sd_daily_steps"] = actisteps_agg["sd_daily_steps"].fillna(0.0)

selected_columns = ["SEQN", "mean_daily_steps", "sd_daily_steps"]
df = actisteps_agg[selected_columns].copy()

# Compute Activity Counts Features
ac_minute_cols = [c for c in ac_df.columns if c.startswith("min_")]
ac_df["daily_AC"] = ac_df[ac_minute_cols].sum(axis=1, numeric_only=True)
ac_agg = ac_df.groupby("SEQN").agg(
    mean_daily_AC=("daily_AC", "mean"),
    sd_daily_AC=("daily_AC", "std")
).reset_index()
ac_agg["sd_daily_AC"] = ac_agg["sd_daily_AC"].fillna(0.0)

selected_columns = ["SEQN", "mean_daily_AC", "sd_daily_AC"]
ac_agg = ac_agg[selected_columns].copy()
df = df.merge(ac_agg, on="SEQN", how="left")

# Compute MIMS (Monitor-Independent Movement Summary) Features
mims_minute_cols = [c for c in mims_df.columns if c.startswith("min_")]
mims_df["daily_mims_sum"] = mims_df[mims_minute_cols].sum(axis=1, numeric_only=True)
mims_agg = mims_df.groupby("SEQN").agg(
    mean_daily_mims=("daily_mims_sum", "mean"),
).reset_index()
selected_columns = ["SEQN", "mean_daily_mims"]
mims_agg = mims_agg[selected_columns].copy()
df = df.merge(mims_agg, on="SEQN", how="left")

# Merge with Subject Info
print(subj_df.columns.tolist())
selected_columns = ["SEQN", "gender", "age_in_years_at_screening"]
subj_df = subj_df[selected_columns].copy()
subj_df['gender'] = subj_df['gender'].map({'Male': 0, 'Female': 1})
subj_df = subj_df.rename(columns={'age_in_years_at_screening': 'age'})
df = df.merge(subj_df, on="SEQN", how="left")

df.describe()

Computing Wearable Data Features...
['SEQN', 'data_release_cycle', 'gender', 'age_in_years_at_screening', 'full_sample_2_year_interview_weight', 'full_sample_2_year_mec_exam_weight', 'masked_variance_pseudo_psu', 'masked_variance_pseudo_stratum']


Unnamed: 0,SEQN,mean_daily_steps,sd_daily_steps,mean_daily_AC,sd_daily_AC,mean_daily_mims,gender,age
count,14685.0,14685.0,14685.0,14685.0,14685.0,14685.0,14685.0,14685.0
mean,73183.777528,9696.561159,4827.944617,2151068.0,1063271.0,11352.391422,0.510861,35.753899
std,6486.391406,3703.772504,1902.143937,883331.1,443527.9,4329.524632,0.499899,23.184655
min,62161.0,1.5,0.0,125.6192,0.0,1.344667,0.0,3.0
25%,67313.0,7313.111111,3593.25027,1571732.0,757507.7,8623.339,0.0,14.0
50%,74099.0,9805.111111,4756.055824,2129310.0,1028943.0,11335.417778,1.0,33.0
75%,78971.0,12158.666667,5954.645665,2734121.0,1333755.0,14213.382,1.0,55.0
max,83731.0,29042.222222,18750.860952,7206275.0,7410255.0,35693.532444,1.0,80.0


In [8]:
# 4 - Load NHANES Blood Pressure Questionnaire Data + Merge Features

bpq_2011 = pd.read_sas(bpq_path_2011, format="xport")
bpq_2013 = pd.read_sas(bpq_path_2013, format="xport")

bpq = pd.concat([bpq_2011, bpq_2013], ignore_index=True)
# BPQ020: Ever told you had high blood pressure
# BPQ080: Doctor told you - high cholesterol level
selected_columns = ["SEQN", "BPQ020", "BPQ080"]
bpq = bpq[selected_columns].copy()
bpq = bpq.dropna()
df = df.merge(bpq, on="SEQN", how="inner")

df.describe()

Unnamed: 0,SEQN,mean_daily_steps,sd_daily_steps,mean_daily_AC,sd_daily_AC,mean_daily_mims,gender,age,BPQ020,BPQ080
count,10089.0,10089.0,10089.0,10089.0,10089.0,10089.0,10089.0,10089.0,10089.0,10089.0
mean,73319.499455,9298.063513,4623.720766,1955659.0,958097.7,10368.249539,0.520765,47.337595,1.652295,1.715135
std,6476.847021,3719.139896,1989.634256,771049.4,401109.8,3712.342248,0.499593,18.598677,0.526108,0.743949
min,62161.0,1.5,0.0,125.6192,0.0,1.344667,0.0,16.0,1.0,1.0
25%,67473.0,6844.111111,3286.032905,1470669.0,687540.6,8112.73,0.0,31.0,1.0,1.0
50%,74259.0,9184.444444,4441.714368,1945696.0,924700.5,10423.461333,1.0,47.0,2.0,2.0
75%,79132.0,11603.444444,5735.96443,2448192.0,1188918.0,12794.944111,1.0,62.0,2.0,2.0
max,83729.0,26553.222222,18750.860952,6911811.0,5098803.0,29617.966444,1.0,80.0,9.0,9.0


In [9]:
# 5 Load NHANES Laboratory Glycohemoglobin Data + Calculate Ground Truth Diabetes Binary
print("Loading Lab A1C Data...")

ghb_2011 = pd.read_sas("./data/nhanes-lab/ghb-2011-12.xpt", format="xport")
ghb_2013 = pd.read_sas("./data/nhanes-lab/ghb-2013-14.xpt", format="xport")

print("Lab Data Shape:")
print("- GHB 2011-2012:", ghb_2011.shape)
print("- GHB 2013-2014:", ghb_2013.shape)

print("Calculating ground truth with A1C lab data...")
ghb_2011 = ghb_2011[["SEQN", "LBXGH"]].rename(columns={"LBXGH": "a1c_2011"})
ghb_2013 = ghb_2013[["SEQN", "LBXGH"]].rename(columns={"LBXGH": "a1c_2013"})

# Combine 2011-12 and 2013-14 data
ghb = pd.concat([ghb_2011, ghb_2013], ignore_index=True)
ghb["a1c"] = ghb["a1c_2011"].combine_first(ghb["a1c_2013"])
ghb = ghb.dropna(subset=["a1c"])
ghb = ghb.drop_duplicates(subset=["SEQN"], keep="first")

# A1C Diabetes Criteria (We include prediabetes as diabetes):
#   normal < 5.7
#   prediabetes 5.7-6.4
#   diabetes >= 6.5
ghb["diabetes_binary"] = (ghb["a1c"] >= 5.7).astype(int)

selected_columns = ["SEQN", "diabetes_binary"]
ghb = ghb[selected_columns].copy()
df = df.merge(ghb, on="SEQN", how="inner")

df.describe()

Loading Lab A1C Data...
Lab Data Shape:
- GHB 2011-2012: (6549, 2)
- GHB 2013-2014: (6979, 2)
Calculating ground truth with A1C lab data...


Unnamed: 0,SEQN,mean_daily_steps,sd_daily_steps,mean_daily_AC,sd_daily_AC,mean_daily_mims,gender,age,BPQ020,BPQ080,diabetes_binary
count,9693.0,9693.0,9693.0,9693.0,9693.0,9693.0,9693.0,9693.0,9693.0,9693.0,9693.0
mean,73345.937687,9341.622032,4638.183336,1965061.0,961076.0,10415.181679,0.520891,47.39936,1.652017,1.712679,0.375116
std,6474.360186,3695.534337,1983.649429,765999.8,399448.1,3683.15822,0.499589,18.507968,0.528125,0.742608,0.484178
min,62161.0,3.0,0.0,186.2902,0.0,1.344667,0.0,16.0,1.0,1.0,0.0
25%,67492.0,6893.333333,3305.466193,1481845.0,691361.0,8173.797111,0.0,32.0,1.0,1.0,0.0
50%,74300.0,9222.125,4454.423105,1952477.0,926510.8,10459.773333,1.0,47.0,2.0,2.0,0.0
75%,79152.0,11621.111111,5741.10431,2451345.0,1189246.0,12816.167556,1.0,62.0,2.0,2.0,1.0
max,83729.0,26553.222222,18750.860952,6911811.0,5098803.0,29617.966444,1.0,80.0,9.0,9.0,1.0


In [11]:
from sklearn.model_selection import train_test_split

# 6 Split Data into Train/Val/Test:
print("Splitting training for train/val/test...")

X = df.drop(columns=["SEQN", "diabetes_binary"])
y = df["diabetes_binary"].astype(int)

X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.30, random_state=42, stratify=y
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.50, random_state=42, stratify=y_temp
)

print("Split sizes (train/val/test):", len(X_train), len(X_val), len(X_test))

Splitting training for train/val/test...
Split sizes (train/val/test): 6785 1454 1454


In [30]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score, make_scorer

# 6.5 - (Optional) Hyperparameter Tuning for Random Forest with GridSearchCV

param_grid = {
    'n_estimators': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000],
    'max_depth': [5, 10, 15, 20, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'class_weight': ['balanced']
}

rf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(
    estimator=rf,
    param_grid=param_grid,
    scoring=make_scorer(f1_score),
    cv=3,
    n_jobs=-1,
    verbose=2
)
grid_search.fit(X_train, y_train)

print("Best parameters found:")
print(grid_search.best_params_)
print(f"Best F1 score: {grid_search.best_score_:.4f}")

# Use best estimator to predict on validation and test sets
y_val_pred = grid_search.best_estimator_.predict(X_val)
y_test_pred = grid_search.best_estimator_.predict(X_test)

print("Validation F1:", f1_score(y_val, y_val_pred))
print("Test F1:", f1_score(y_test, y_test_pred))

# Best parameters found:
# {'class_weight': 'balanced', 'max_depth': 5, 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 900}
# Best F1 score: 0.6544
# Validation F1: 0.6537867078825348
# Test F1: 0.6506211180124224


Fitting 3 folds for each of 450 candidates, totalling 1350 fits


Fitting 3 folds for each of 450 candidates, totalling 1350 fits


KeyboardInterrupt: 

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    f1_score, precision_score, recall_score,
    average_precision_score, confusion_matrix
)
import numpy as np

# 7. Train Random Forest Model

rf_model = RandomForestClassifier(n_estimators=900, class_weight="balanced", random_state=42, max_depth=5, min_samples_leaf=4, min_samples_split=2)
rf_model.fit(X_train, y_train)

# choose threshold by maximizing F1 on validation
y_val_prob = rf_model.predict_proba(X_val)[:, 1]
best_t, best_f1 = 0.0, -1.0
for t in np.linspace(0.05, 0.95, 200):
    preds = (y_val_prob >= t).astype(int)
    score = f1_score(y_val, preds)
    if score > best_f1:
        best_f1 = score
        best_t = t
print("Best validation threshold:", best_t, "F1:", best_f1)

y_test_prob = rf_model.predict_proba(X_test)[:, 1]
y_test_pred = (y_test_prob >= best_t).astype(int)

print("Test Precision:", precision_score(y_test, y_test_pred))
print("Test Recall:", recall_score(y_test, y_test_pred))
print("Test F1:", f1_score(y_test, y_test_pred))
print("Test PR AUC:", average_precision_score(y_test, y_test_prob))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))


Best validation threshold: 0.5339195979899497 F1: 0.6634304207119741
Adjusted (recall-boosted) threshold: 0.4538316582914572
Test Precision: 0.5460048426150121
Test Recall: 0.8275229357798165
Test F1: 0.6579139314369074
Test PR AUC: 0.6005394568258001
Confusion Matrix:
 [[534 375]
 [ 94 451]]


In [32]:
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV

# 7.5 - (Optional) Hyperparameter Tuning for XGBoost with GridSearchCV

param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 6, 9],
    'learning_rate': [0.01, 0.1, 0.2],
    'scale_pos_weight': [1, 2, 5]
}

xgb = XGBClassifier(random_state=42, eval_metric='logloss')
grid_search = GridSearchCV(xgb, param_grid, scoring='f1', cv=3, n_jobs=-1)
grid_search.fit(X_train, y_train)

print("Best parameters found:")
print(grid_search.best_params_)
print(f"Best F1 score: {grid_search.best_score_:.4f}")

# Use best estimator to predict on validation and test sets
y_val_pred = grid_search.best_estimator_.predict(X_val)
y_test_pred = grid_search.best_estimator_.predict(X_test)

print("Validation F1:", f1_score(y_val, y_val_pred))
print("Test F1:", f1_score(y_test, y_test_pred))

Best parameters found:
{'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 300, 'scale_pos_weight': 2}
Best F1 score: 0.6574
Validation F1: 0.6568483063328424
Test F1: 0.6563649742457689


In [50]:
# 8. Train XGBoost Model

from xgboost import XGBClassifier

xgb_model = XGBClassifier(
    n_estimators=300,
    max_depth=3,
    learning_rate=0.01,
    scale_pos_weight=2,
    random_state=42,
    eval_metric='logloss'
)
xgb_model.fit(X_train, y_train)

# Threshold tuning on validation set
y_val_prob_xgb = xgb_model.predict_proba(X_val)[:, 1]
best_t_xgb, best_f1_xgb = 0.0, -1.0
for t in np.linspace(0.05, 0.95, 200):
    preds = (y_val_prob_xgb >= t).astype(int)
    score = f1_score(y_val, preds)
    if score > best_f1_xgb:
        best_f1_xgb = score
        best_t_xgb = t
print("XGBoost - Best validation threshold:", best_t_xgb, "F1:", best_f1_xgb)

y_test_prob_xgb = xgb_model.predict_proba(X_test)[:, 1]
y_test_pred_xgb = (y_test_prob_xgb >= best_t_xgb).astype(int)

print("\n--- XGBoost Results ---")
print("Test Precision:", precision_score(y_test, y_test_pred_xgb))
print("Test Recall:", recall_score(y_test, y_test_pred_xgb))
print("Test F1:", f1_score(y_test, y_test_pred_xgb))
print("Test PR AUC:", average_precision_score(y_test, y_test_prob_xgb))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred_xgb))

XGBoost - Best validation threshold: 0.5791457286432161 F1: 0.6639871382636656

--- XGBoost Results ---
Test Precision: 0.5738636363636364
Test Recall: 0.7412844036697248
Test F1: 0.6469175340272217
Test PR AUC: 0.6106223888114422
Confusion Matrix:
 [[609 300]
 [141 404]]


In [39]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

# 8.5 - (Optional) Hyperparameter Tuning for MLP Neural Network with GridSearchCV

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

param_grid = {
    'hidden_layer_sizes': [(64,), (64, 32), (64, 32, 16), (128, 64), (128, 64, 32)],
    'activation': ['relu', 'tanh'],
    'learning_rate': ['constant', 'adaptive'],
    'learning_rate_init': [0.001, 0.01],
    'alpha': [0.0001, 0.001, 0.01],
}

# Use early_stopping in the base estimator to prevent overfitting
mlp = MLPClassifier(
    random_state=42,
    max_iter=500,
    early_stopping=True,
    validation_fraction=0.1,
    n_iter_no_change=10
)

grid_search = GridSearchCV(mlp, param_grid, scoring='f1', cv=5, n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

print("Best parameters found:")
print(grid_search.best_params_)
print(f"Best CV F1 score: {grid_search.best_score_:.4f}")

# Evaluate on validation and test sets
y_val_pred = grid_search.best_estimator_.predict(X_val_scaled)
y_test_pred = grid_search.best_estimator_.predict(X_test_scaled)

print(f"\nValidation F1: {f1_score(y_val, y_val_pred):.4f}")
print(f"Test F1: {f1_score(y_test, y_test_pred):.4f}")

Best parameters found:
{'activation': 'tanh', 'alpha': 0.0001, 'hidden_layer_sizes': (128, 64), 'learning_rate': 'constant', 'learning_rate_init': 0.01}
Best CV F1 score: 0.6374

Validation F1: 0.6332
Test F1: 0.6088


In [51]:
# 9. Train Neural Network (MLP)

from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler

# Neural networks benefit from feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Calculate class weights for the loss function
from sklearn.utils.class_weight import compute_sample_weight
sample_weights = compute_sample_weight('balanced', y_train)

mlp_model = MLPClassifier(
    hidden_layer_sizes=(128, 64),
    activation='tanh',
    solver='adam',
    learning_rate='constant',
    learning_rate_init=0.01,
    max_iter=500,
    early_stopping=True,
    validation_fraction=0.1,
    random_state=42,
    verbose=False,
    alpha=0.0001,
)
mlp_model.fit(X_train_scaled, y_train)

# Threshold tuning on validation set
y_val_prob_mlp = mlp_model.predict_proba(X_val_scaled)[:, 1]
best_t_mlp, best_f1_mlp = 0.0, -1.0
for t in np.linspace(0.05, 0.95, 200):
    preds = (y_val_prob_mlp >= t).astype(int)
    score = f1_score(y_val, preds)
    if score > best_f1_mlp:
        best_f1_mlp = score
        best_t_mlp = t
print("MLP - Best validation threshold:", best_t_mlp, "F1:", best_f1_mlp)

y_test_prob_mlp = mlp_model.predict_proba(X_test_scaled)[:, 1]
y_test_pred_mlp = (y_test_prob_mlp >= best_t_mlp).astype(int)

print("\n--- Neural Network (MLP) Results ---")
print("Test Precision:", precision_score(y_test, y_test_pred_mlp))
print("Test Recall:", recall_score(y_test, y_test_pred_mlp))
print("Test F1:", f1_score(y_test, y_test_pred_mlp))
print("Test PR AUC:", average_precision_score(y_test, y_test_prob_mlp))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred_mlp))

MLP - Best validation threshold: 0.33492462311557786 F1: 0.6592095451155854

--- Neural Network (MLP) Results ---
Test Precision: 0.546583850931677
Test Recall: 0.8073394495412844
Test F1: 0.6518518518518519
Test PR AUC: 0.5939069176997727
Confusion Matrix:
 [[544 365]
 [105 440]]


In [52]:
# 10. Ensemble Methods - Combining RF, XGBoost, and MLP

# Get probability predictions from all models
probs_rf = rf_model.predict_proba(X_test)[:, 1]
probs_xgb = xgb_model.predict_proba(X_test)[:, 1]
probs_mlp = mlp_model.predict_proba(X_test_scaled)[:, 1]

# Get validation probabilities for threshold tuning
val_probs_rf = rf_model.predict_proba(X_val)[:, 1]
val_probs_xgb = xgb_model.predict_proba(X_val)[:, 1]
val_probs_mlp = mlp_model.predict_proba(X_val_scaled)[:, 1]

# Calculate Weights for Weighted Average based on validation PR-AUC
w_rf = average_precision_score(y_val, val_probs_rf)
w_xgb = average_precision_score(y_val, val_probs_xgb)
w_mlp = average_precision_score(y_val, val_probs_mlp)
total_w = w_rf + w_xgb + w_mlp
w_rf, w_xgb, w_mlp = w_rf/total_w, w_xgb/total_w, w_mlp/total_w
print(f"Learned weights - RF: {w_rf:.3f}, XGB: {w_xgb:.3f}, MLP: {w_mlp:.3f}")

ensemble_weighted_val = w_rf * val_probs_rf + w_xgb * val_probs_xgb + w_mlp * val_probs_mlp
ensemble_weighted_test = w_rf * probs_rf + w_xgb * probs_xgb + w_mlp * probs_mlp

# Function to find optimal threshold for target recall
def find_threshold_for_recall(y_true, y_prob, target_recall=0.95):
    best_t, best_f1 = 0.0, -1.0
    for t in np.linspace(0.01, 0.95, 300):
        preds = (y_prob >= t).astype(int)
        rec = recall_score(y_true, preds)
        if rec >= target_recall:
            f1 = f1_score(y_true, preds)
            if f1 > best_f1:
                best_f1 = f1
                best_t = t
    return best_t, best_f1


# Find threshold that achieves ~95% recall
results = []
best_t = 0.2 # Fallback
best_t, _ = find_threshold_for_recall(y_val, ensemble_weighted_val, target_recall=0.95)


y_pred = (ensemble_weighted_test >= best_t).astype(int)

prec = precision_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
pr_auc = average_precision_score(y_test, ensemble_weighted_test)

print(f"\n--- Weighted Average ---")
print(f"Threshold: {best_t:.3f}")
print(f"Precision: {prec:.4f}")
print(f"Recall: {rec:.4f}")
print(f"F1: {f1:.4f}")
print(f"PR AUC: {pr_auc:.4f}")
print(f"Confusion Matrix:\n{confusion_matrix(y_test, y_pred)}")


Learned weights - RF: 0.332, XGB: 0.336, MLP: 0.332

--- Weighted Average ---
Threshold: 0.199
Precision: 0.4617
Recall: 0.9633
F1: 0.6243
PR AUC: 0.6090
Confusion Matrix:
[[297 612]
 [ 20 525]]
