## Model Loading from S3 and Evaluation

In [None]:
import joblib
import boto3
import os

# --- Download from S3 ---
bucket = "hqpsusu-ml-data-bucket"
prefix = "final_project/models/"

s3 = boto3.client("s3")

local_rf  = "/tmp/rf_model.pkl"
local_xgb = "/tmp/xgb_model.pkl"

s3.download_file(bucket, prefix + "rf_model.pkl", local_rf)
s3.download_file(bucket, prefix + "xgb_model.pkl", local_xgb)

# --- Load models ---
rf_loaded  = joblib.load(local_rf)
xgb_loaded = joblib.load(local_xgb)


# Optional: LSTM
try:
    import torch
    lstm_loaded = mlflow.pytorch.load_model(lstm_uri)
    has_lstm = True
except Exception:
    print("ℹ️ No LSTM found in S3 — skipping.")
    has_lstm = False

# =========================================================
# 3) Evaluation helper
# =========================================================
def eval_model(name, y_true, y_pred):
    eps = 1e-6
    mask = np.isfinite(y_true) & np.isfinite(y_pred)
    yt, yp = np.asarray(y_true)[mask], np.asarray(y_pred)[mask]
    errors = yp - yt
    abs_err = np.abs(errors)
    
    return {
        "Model": name,
        "MAE": mean_absolute_error(yt, yp),
        "RMSE": mean_squared_error(yt, yp, squared=False),
        "Bias": np.mean(errors),
        "sMAPE (%)": 100.0 * np.mean(2.0 * abs_err / (np.abs(yt) + np.abs(yp) + eps)),
        "≤5 min": 100.0 * np.mean(abs_err <= 5),
        "≤10 min": 100.0 * np.mean(abs_err <= 10),
        "R²": r2_score(yt, yp),
        "Errors": errors,
        "y_true": yt,
        "y_pred": yp
    }

# =========================================================
# 4) Run predictions
# =========================================================
results = []

# RF
y_pred_rf = rf_loaded.predict(X_test)
results.append(eval_model("RandomForest (S3)", y_test, y_pred_rf))

# XGB
y_pred_xgb = xgb_loaded.predict(X_test)
results.append(eval_model("XGBoost (S3)", y_test, y_pred_xgb))

# LSTM
if has_lstm:
    y_pred_lstm = lstm_loaded(torch.tensor(X_test.values, dtype=torch.float32)).detach().numpy().ravel()
    results.append(eval_model("LSTM (S3)", y_test, y_pred_lstm))

# =========================================================
# 5) Tabulate results
# =========================================================
df_results = pd.DataFrame(results).drop(columns=["Errors","y_true","y_pred"])
print(df_results)

# =========================================================
# 6) Visuals
# =========================================================
plt.figure(figsize=(18,6))

# (A) Predicted vs Actual
for i, res in enumerate(results, 1):
    yt, yp = res["y_true"], res["y_pred"]
    ax = plt.subplot(1, len(results), i)
    ax.scatter(yt, yp, s=10, alpha=0.5)
    mn, mx = yt.min(), yt.max()
    ax.plot([mn, mx], [mn, mx], "r--")
    ax.set_title(f"{res['Model']}\nMAE={res['MAE']:.2f}, RMSE={res['RMSE']:.2f}, R²={res['R²']:.2f}")
    ax.set_xlabel("Actual RUL")
    ax.set_ylabel("Predicted RUL")

plt.tight_layout()
plt.show()

# (B) Error distributions
plt.figure(figsize=(10,6))
for res in results:
    plt.hist(res["Errors"], bins=60, alpha=0.5, label=res["Model"])
plt.axvline(0, color="k", linestyle="--")
plt.xlabel("Prediction Error (min)")
plt.ylabel("Count")
plt.title("Error Distributions")
plt.legend()
plt.show()
