# Phase 3 Performance (Holdout Metrics)

This notebook computes Phase 3 stacking ensemble metrics on a 75/25 holdout split using the Phase 2 feature dataset. It also writes `data/phase3_predictions.csv` for audit/repro and prints MAE/RMSE/RÂ² and within-$ thresholds.

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from math import sqrt
from sklearn.ensemble import StackingRegressor, RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

# Load Phase 2 feature set (use relative path for notebook or repo root)
data_path = Path("data/phase2_features_baseline_models.csv")
if not data_path.exists():
    data_path = Path("../data/phase2_features_baseline_models.csv")
df = pd.read_csv(data_path)

features = [
    "trip_duration_days",
    "miles_traveled",
    "total_receipts_amount",
    "cost_per_day",
    "cost_per_mile",
    "miles_per_day",
    "cost_ratio",
]

X = df[features]
y = df["reimbursement"]

# 75/25 holdout split (manual index-based split to match prior runs)
split = int(0.75 * len(df))
X_train, X_test = X.iloc[:split], X.iloc[split:]
y_train, y_test = y.iloc[:split], y.iloc[split:]

# Phase 3 stacking ensemble recipe (use tuned saved model)
model_path = Path('src/final_model.pkl')
if not model_path.exists():
    model_path = Path('../src/final_model.pkl')
model = joblib.load(model_path)

base_models = [
    ("decision_tree", DecisionTreeRegressor(random_state=42)),
    ("random_forest", RandomForestRegressor(n_estimators=200, random_state=42)),
    ("gradient_boosting", GradientBoostingRegressor(random_state=42)),
]
meta_model = LinearRegression()
model = StackingRegressor(estimators=base_models, final_estimator=meta_model, passthrough=True)
model.fit(X_train, y_train)

# Predict on holdout
pred = model.predict(X_test)
abs_diff = np.abs(y_test.values - pred)

# Metrics
mae = mean_absolute_error(y_test, pred)
rmse = sqrt(mean_squared_error(y_test, pred))
r2 = r2_score(y_test, pred)
within_0_01 = (abs_diff <= 0.01).mean()
within_1 = (abs_diff <= 1.0).mean()
within_5 = (abs_diff <= 5.0).mean()

print("Phase 3 holdout metrics (75/25 split):")
print(f"MAE:   {mae:.4f}")
print(f"RMSE:  {rmse:.4f}")
print(f"R^2:   {r2:.4f}")
print(f"Exact (<= $0.01): {within_0_01:.4f}")
print(f"Close (<= $1):    {within_1:.4f}")
print(f"Within $5:        {within_5:.4f}")
print(f"Test rows:        {len(y_test)}")

# Save predictions for audit/repro
out = pd.DataFrame({
    "Actual": y_test.values,
    "Predicted": pred,
    "AbsDiff": abs_diff,
})
out_path = data_path.parent / "phase3_predictions.csv"
out_path.parent.mkdir(parents=True, exist_ok=True)
out.to_csv(out_path, index=False)
print(f"Saved predictions to {out_path}")


Phase 3 holdout metrics (75/25 split):
MAE:   79.0586
RMSE:  114.2183
R^2:   0.9295
Exact (<= $0.01): 0.0000
Close (<= $1):    0.0040
Within $5:        0.0280
Test rows:        250
Saved predictions to ..\data\phase3_predictions.csv
