In [2]:
import numpy as np
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score
from diabetes_utils import plot_and_save_metrics

# Load test labels and predicted probabilities
y_test_tarnn = np.load("y_test_tarnn.npy")
y_test_lstm  = np.load("y_test_LSTM.npy")
y_test_xgb   = np.load("y_test_xgb.npy")

p_tarnn = np.load("prob_tarnn.npy").ravel()
p_lstm  = np.load("probs_LSTM.npy").ravel()
p_xgb   = np.load("probs_xgb.npy").ravel()

# Check that all models use the same test set
assert np.array_equal(y_test_tarnn, y_test_lstm)
assert np.array_equal(y_test_tarnn, y_test_xgb)

y_test = y_test_tarnn

print("y_test shape:", y_test.shape)
print("TA-RNN probs shape:", p_tarnn.shape)
print("LSTM probs shape:", p_lstm.shape)
print("XGBoost probs shape:", p_xgb.shape)

# Simple average ensemble
p_ens = (p_tarnn + p_lstm + p_xgb) / 3.0
y_pred = (p_ens >= 0.5).astype(int)

ens_results = {
    "accuracy": round(accuracy_score(y_test, y_pred), 3),
    "roc_auc": round(roc_auc_score(y_test, p_ens), 3),
    "f1_pos":  round(f1_score(y_test, y_pred, zero_division=0), 3),
}

print("\nEnsemble TA-RNN + LSTM + XGBoost")
for k, v in ens_results.items():
    print(f"  {k}: {v}")

# Save plots
plot_and_save_metrics("ensemble_tarnn_lstm_xgb", y_test, p_ens)

y_test shape: (20354,)
TA-RNN probs shape: (20354,)
LSTM probs shape: (20354,)
XGBoost probs shape: (20354,)

Ensemble TA-RNN + LSTM + XGBoost
  accuracy: 0.888
  roc_auc: 0.69
  f1_pos: 0.059
