In [13]:
# ===============================
# CASE 2 (ALL YEARS, FIXED): Temporal Exogenous
# Train: 2018-01-01 .. 2023-12-31
# Test : 2024-01-01 .. 2024-12-31
# ===============================

CSV_PATH = "AnE_Data_fin.csv"           # 2018–2024 raw CSV (12-hour clock)
XLSX_2024_PATH = "ANE_Data_2024.xlsx"   # trusted 2024 file
CAL_PATH = "calendar_2018_2024.csv"     # calendar with holiday info

HOURS = [24, 48, 72]
HORIZON_DAYS = [1, 2, 3]
m = 7  # weekly seasonality for daily data

# ---------- 1) ARRIVALS: 2018–2023 from CSV, 2024 from Excel ----------
def load_daily_from_csv(csv_path, start="2018-01-01", end="2023-12-31"):
    df = pd.read_csv(csv_path, low_memory=False)
    # robust parse for strings like "1/1/2018  12:13:00 am"
    # first try exact format; if it fails for some rows, fall back with errors='coerce'
    try:
        dt = pd.to_datetime(df["A&E Admit Date Time"],
                            format="%m/%d/%Y %I:%M:%S %p",
                            errors="coerce")
    except Exception:
        dt = pd.to_datetime(df["A&E Admit Date Time"], errors="coerce")
    df = df.assign(dt=dt).dropna(subset=["dt"])
    df = df[(df["dt"] >= pd.Timestamp(start)) & (df["dt"] <= pd.Timestamp(end))].copy()

    # daily counts on a full grid to avoid gaps
    full_index = pd.date_range(start, end, freq="D")
    daily_counts = df.groupby(df["dt"].dt.date).size()
    arrivals = daily_counts.reindex(full_index.date, fill_value=0).astype(float)
    return pd.Series(arrivals.values, index=full_index, name="Arrivals")

def load_daily_2024_from_excel(xlsx_path):
    raw = pd.read_excel(xlsx_path)
    raw["A&E Admit Date Time"] = pd.to_datetime(raw["A&E Admit Date Time"], errors="coerce")
    raw = raw.dropna(subset=["A&E Admit Date Time"])
    full_index = pd.date_range("2024-01-01", "2024-12-31", freq="D")
    daily_counts = raw.groupby(raw["A&E Admit Date Time"].dt.date).size()
    arrivals = daily_counts.reindex(full_index.date, fill_value=0).astype(float)
    return pd.Series(arrivals.values, index=full_index, name="Arrivals")

y_2018_2023 = load_daily_from_csv(CSV_PATH, "2018-01-01", "2023-12-31")
y_2024       = load_daily_2024_from_excel(XLSX_2024_PATH)

# concatenate into one continuous series 2018–2024
y_all = pd.concat([y_2018_2023, y_2024])
y_all.index.name = "Date"

df = y_all.to_frame()

# ---------- 2) TEMPORAL EXOG (DoW, Quarter, DoM_norm, Holiday) ----------
cal = pd.read_csv(CAL_PATH)

date_col_candidates = [c for c in cal.columns if c.lower() in ("date", "ds")]
if not date_col_candidates:
    raise ValueError("calendar_2018_2024.csv must have a 'Date' (or 'ds') column.")
cal = cal.rename(columns={date_col_candidates[0]: "Date"})
cal["Date"] = pd.to_datetime(cal["Date"], errors="coerce")
cal = cal.dropna(subset=["Date"]).sort_values("Date").set_index("Date")

# detect a holiday flag and normalize to 0/1
hol_cols = [c for c in cal.columns if ("holiday" in c.lower()) or ("public" in c.lower())]
if not hol_cols:
    cal["is_holiday"] = 0
    hol_col = "is_holiday"
else:
    hol_col = hol_cols[0]
    cal[hol_col] = (
        cal[hol_col].astype(str).str.strip().str.lower()
        .map({"1":1,"true":1,"yes":1,"y":1,"t":1,"holiday":1})
        .fillna(0).astype(int)
    )

full_index = pd.date_range("2018-01-01", "2024-12-31", freq="D")
cal_full = cal.reindex(full_index).copy()
cal_full.index.name = "Date"
cal_full[hol_col] = cal_full[hol_col].fillna(0).astype(int)

tmp = pd.DataFrame(index=full_index)
tmp["dow"] = tmp.index.dayofweek
tmp["dom"] = tmp.index.day
tmp["qtr"] = tmp.index.quarter
tmp["holiday"] = cal_full[hol_col].values
tmp["dom_norm"] = (tmp["dom"] - tmp["dom"].mean()) / (tmp["dom"].std() + 1e-8)

dow_dummies = pd.get_dummies(tmp["dow"], prefix="dow", drop_first=True)
qtr_dummies = pd.get_dummies(tmp["qtr"], prefix="qtr", drop_first=True)
exog_full = pd.concat([dow_dummies, qtr_dummies, tmp[["dom_norm", "holiday"]]], axis=1)

# numeric hardening; align to df
exog_full = exog_full.apply(pd.to_numeric, errors="coerce")
exog_full = exog_full.replace([np.inf, -np.inf], np.nan).fillna(0.0).astype("float64")
exog_full = exog_full.reindex(df.index)

# ---------- 3) TRAIN / TEST SPLIT ----------
train_end = pd.Timestamp("2023-12-31")
train = df.loc[:train_end]
test  = df.loc["2024-01-01":"2024-12-31"]

y_train = train["Arrivals"].values.astype(float)
y_test  = test["Arrivals"].values.astype(float)
train_exog = exog_full.loc[train.index]
test_exog  = exog_full.loc[test.index]
test_index = test.index

# ---------- 4) FIT SARIMAX (reuse params via filter) ----------
base_exog = SARIMAX(
    y_train,
    exog=train_exog.values,
    order=(1, 1, 1),
    seasonal_order=(1, 1, 1, m),
    enforce_stationarity=False,
    enforce_invertibility=False
).fit(disp=False)
params_exog = base_exog.params

def forecast_with_filter_exog(history_y, history_exog, steps, future_exog):
    mod = SARIMAX(
        history_y,
        exog=history_exog,
        order=(1, 1, 1),
        seasonal_order=(1, 1, 1, m),
        enforce_stationarity=False,
        enforce_invertibility=False
    )
    res_filt = mod.filter(params_exog)
    yhat = np.asarray(res_filt.get_forecast(steps=steps, exog=future_exog).predicted_mean, dtype=float)
    return np.abs(yhat)  # keep absolute as required

def da_window(y_true_vec, y_pred_vec):
    yt, yp = np.asarray(y_true_vec, float), np.asarray(y_pred_vec, float)
    if len(yt) < 2 or len(yp) < 2:
        return np.nan
    return (np.sign(np.diff(yt)) == np.sign(np.diff(yp))).mean() * 100.0

# ---------- 5) SLIDING-WINDOW EVAL ON 2024 (with 24h DA fix) ----------
results_exog = {h: {"rmse": [], "mae": [], "da": []} for h in HORIZON_DAYS}
t0 = time.time()

for h in HORIZON_DAYS:
    if h == 1:
        for i in range(0, len(y_test) - 1 + 1):
            hist_y  = np.r_[y_train, y_test[:i]]
            hist_ex = np.vstack([train_exog.values, test_exog.values[:i]]) if i > 0 else train_exog.values
            fut_ex  = test_exog.values[i:i+1]

            y_pred1 = forecast_with_filter_exog(hist_y, hist_ex, 1, fut_ex)[0]
            y_true1 = y_test[i]

            results_exog[h]["rmse"].append(np.sqrt(mean_squared_error([y_true1], [y_pred1])))
            results_exog[h]["mae"].append(mean_absolute_error([y_true1], [y_pred1]))

            if i >= 1:  # DA fix: compare to previous actual
                prev_actual = y_test[i-1]
                dir_true = np.sign(y_true1 - prev_actual)
                dir_pred = np.sign(y_pred1 - prev_actual)
                results_exog[h]["da"].append(100.0 if dir_true == dir_pred else 0.0)
            if i % 100 == 0: gc.collect()
    else:
        for i in range(0, len(y_test) - h + 1):
            hist_y  = np.r_[y_train, y_test[:i]]
            hist_ex = np.vstack([train_exog.values, test_exog.values[:i]]) if i > 0 else train_exog.values
            fut_ex  = test_exog.values[i:i+h]

            y_predh = forecast_with_filter_exog(hist_y, hist_ex, h, fut_ex)
            y_trueh = y_test[i:i+h]

            results_exog[h]["rmse"].append(np.sqrt(mean_squared_error(y_trueh, y_predh)))
            results_exog[h]["mae"].append(mean_absolute_error(y_trueh, y_predh))
            results_exog[h]["da"].append(da_window(y_trueh, y_predh))
            if i % 100 == 0: gc.collect()

elapsed = time.time() - t0
print("\nSARIMAX (Temporal Exogenous, Train 2018–2023 → Test 2024)")
for hrs, h in zip(HOURS, HORIZON_DAYS):
    out = {metric: (np.nan if len(results_exog[h][metric]) == 0 else np.nanmean(results_exog[h][metric]))
           for metric in results_exog[h]}
    print(f"{hrs}h (~{h} day) → RMSE: {out['rmse']:.2f} | MAE: {out['mae']:.2f} | DA: {out['da']:.2f}%")
print(f"\nRuntime: {elapsed:.2f} seconds")

# ---------- 6) MULTI-HORIZON PREDICTIONS TABLE (target-aligned, pre-fill & year-end safe) ----------
n_test = len(y_test)
preds_by_h = {h: np.full(n_test, np.nan, dtype=float) for h in HORIZON_DAYS}
max_h = max(HORIZON_DAYS)

# pre-fill first 1–2 targets using pre-2024 origins
for offset in range(1, max_h):
    origin_len = len(y_train) - offset
    if origin_len <= 0:
        continue
    hist_y  = y_train[:origin_len]
    hist_ex = train_exog.values[:origin_len]
    pre_hat = forecast_with_filter_exog(hist_y, hist_ex, max_h, test_exog.values[:max_h])
    for h in HORIZON_DAYS:
        target_idx = h - 1 - offset
        if 0 <= target_idx < n_test:
            preds_by_h[h][target_idx] = pre_hat[h - 1]

# rolling through 2024 — 
for i in range(n_test):
    hist_y  = np.r_[y_train, y_test[:i]]
    hist_ex = np.vstack([train_exog.values, test_exog.values[:i]]) if i > 0 else train_exog.values
    for h in HORIZON_DAYS:
        target_idx = i + h - 1
        if 0 <= target_idx < n_test:
            fut_ex = test_exog.values[i:i+h]
            yhat_h = forecast_with_filter_exog(hist_y, hist_ex, h, fut_ex)
            preds_by_h[h][target_idx] = yhat_h[h - 1]

out = pd.DataFrame({"Date": test_index, "Actual": y_test})
out["Predicted_1d"] = preds_by_h[1]
out["Predicted_2d"] = preds_by_h[2]
out["Predicted_3d"] = preds_by_h[3]
out["Error_1d"] = out["Actual"] - out["Predicted_1d"]
out["Error_2d"] = out["Actual"] - out["Predicted_2d"]
out["Error_3d"] = out["Actual"] - out["Predicted_3d"]

out.to_csv("Case2_Temporal_AllYears_MultiHorizon_2024.csv", index=False, encoding="utf-8-sig")
print("\nSaved: Case2_Temporal_AllYears_MultiHorizon_2024.csv")
print(out.head(10))
print(out.tail(10))


SARIMAX (Temporal Exogenous, Train 2018–2023 → Test 2024)
24h (~1 day) → RMSE: 38.72 | MAE: 38.72 | DA: 62.19%
48h (~2 day) → RMSE: 43.29 | MAE: 39.80 | DA: 62.19%
72h (~3 day) → RMSE: 44.83 | MAE: 39.95 | DA: 62.50%

Runtime: 663.58 seconds

Saved: Case2_Temporal_AllYears_MultiHorizon_2024.csv
        Date  Actual  Predicted_1d  Predicted_2d  Predicted_3d    Error_1d  \
0 2024-01-01   335.0      0.000003      0.000029      0.000030  334.999997   
1 2024-01-02   469.0    335.009369      0.000003      0.000003  133.990631   
2 2024-01-03   373.0    469.003748    335.009370      0.000003  -96.003748   
3 2024-01-04   356.0    372.997315    469.003748    335.009370  -16.997315   
4 2024-01-05   387.0    355.999525    372.997315    469.003748   31.000475   
5 2024-01-06   345.0    387.000867    355.999525    372.997315  -42.000867   
6 2024-01-07   319.0    344.998825    387.000867    355.999525  -25.998825   
7 2024-01-08   431.0    653.999206    679.998758    722.000800 -222.999206   
8

# <font color= maroon>  Base Model (prediction for 24, 48 and 72 hours) 2022 TO 2024

# <font color= maroon>  Model with Temporal Data (prediction for 24, 48 and 72 hours) 2022 TO 2024

In [6]:
# CASE 2 (Temporal Exogenous) — Train: 2022–2023, Test: 2024

import pandas as pd
import numpy as np
import time, gc, warnings
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tools.sm_exceptions import ConvergenceWarning
from sklearn.metrics import mean_squared_error, mean_absolute_error

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.simplefilter("ignore", ConvergenceWarning)

# Config
CSV_PATH = "AnE_Data_fin.csv"           # 2018–2024 raw CSV
CAL_PATH = "calendar_2018_2024.csv"     # calendar with holiday info

HOURS = [24, 48, 72]
HORIZON_DAYS = [1, 2, 3]
m = 7  # weekly seasonality (daily)

# Robust datetime parsing for CSV
def _try_parse(series, fmt=None, dayfirst=False):
    return pd.to_datetime(series, format=fmt, errors="coerce", dayfirst=dayfirst)

def _parse_admit_dt_multi(df):
    s = df["A&E Admit Date Time"]
    dt = pd.Series(pd.NaT, index=df.index, dtype="datetime64[ns]")

    # US 12h with/without seconds
    cand = _try_parse(s, "%m/%d/%Y %I:%M:%S %p", dayfirst=False); dt = dt.fillna(cand)
    cand = _try_parse(s, "%m/%d/%Y %I:%M %p",   dayfirst=False); dt = dt.fillna(cand)

    # DD/MM 24h with/without seconds
    cand = _try_parse(s, "%d/%m/%Y %H:%M:%S",   dayfirst=True);  dt = dt.fillna(cand)
    cand = _try_parse(s, "%d/%m/%Y %H:%M",      dayfirst=True);  dt = dt.fillna(cand)

    # Last-resort inference
    cand = pd.to_datetime(s, errors="coerce", dayfirst=True,  infer_datetime_format=True);  dt = dt.fillna(cand)
    cand = pd.to_datetime(s, errors="coerce", dayfirst=False, infer_datetime_format=True);  dt = dt.fillna(cand)

    # Fallback to date-only column (safe for daily aggregation)
    if "A&E Admit Date" in df.columns:
        d_only = pd.to_datetime(df["A&E Admit Date"], errors="coerce", dayfirst=True)
        dt = dt.where(dt.notna(), d_only)

    return dt

# ARRIVALS: 2022–2023 and 2024 from CSV 
def load_daily_from_csv_range(csv_path, start="2022-01-01", end="2023-12-31"):
    df = pd.read_csv(csv_path, low_memory=False)

    # Parse datetimes with the multi-format helper
    dt = _parse_admit_dt_multi(df)
    df = df.assign(dt=dt).dropna(subset=["dt"])

    # Normalize to midnight (Timestamp), avoid Python datetime.date
    df["day"] = pd.to_datetime(df["dt"], errors="coerce").dt.floor("D")
    df = df.dropna(subset=["day"])

    # Filter by requested range using the normalized day
    start_ts = pd.Timestamp(start)
    end_ts   = pd.Timestamp(end)
    df = df[(df["day"] >= start_ts) & (df["day"] <= end_ts)].copy()

    # Aggregate safely to daily counts
    daily_counts = df.groupby("day", sort=True).size()

    # Continuous daily index for the window (tz-naive)
    full_index = pd.date_range(start=start_ts, end=end_ts, freq="D")

    # Align & fill missing days with 0
    s = daily_counts.reindex(full_index, fill_value=0).astype(float)
    s.index.name = "Date"
    s.name = "Arrivals"

    if s.sum() == 0:
        raise ValueError(f"CSV arrivals sum to 0 in {start}..{end}. Parsing likely failed.")
    return s

y_2022_2023 = load_daily_from_csv_range(CSV_PATH, "2022-01-01", "2023-12-31")
y_2024       = load_daily_from_csv_range(CSV_PATH, "2024-01-01", "2024-12-31")

# Merge series
y_all = pd.concat([y_2022_2023, y_2024])
y_all.index.name = "Date"
df = y_all.to_frame()

# TEMPORAL EXOG
cal = pd.read_csv(CAL_PATH)

# accept 'Date' or 'ds'
date_col_candidates = [c for c in cal.columns if c.lower() in ("date", "ds")]
if not date_col_candidates:
    raise ValueError("calendar_2018_2024.csv must have a 'Date' (or 'ds') column.")
cal = cal.rename(columns={date_col_candidates[0]: "Date"})
cal["Date"] = pd.to_datetime(cal["Date"], errors="coerce")
cal = cal.dropna(subset=["Date"]).sort_values("Date").set_index("Date")

# use first holiday-like column if present, else create one
hol_cols = [c for c in cal.columns if ("holiday" in c.lower()) or ("public" in c.lower())]
if not hol_cols:
    cal["is_holiday"] = 0
    hol_col = "is_holiday"
else:
    hol_col = hol_cols[0]
    cal[hol_col] = (
        cal[hol_col]
        .astype(str).str.strip().str.lower()
        .map({"1":1,"true":1,"yes":1,"y":1,"t":1,"holiday":1})
        .fillna(0).astype(int)
    )

full_index = pd.date_range("2022-01-01", "2024-12-31", freq="D")
cal_full = cal.reindex(full_index).copy()
cal_full.index.name = "Date"
cal_full[hol_col] = cal_full[hol_col].fillna(0).astype(int)

tmp = pd.DataFrame(index=full_index)
tmp["dow"] = tmp.index.dayofweek          # 0=Mon
tmp["dom"] = tmp.index.day
tmp["qtr"] = tmp.index.quarter
tmp["holiday"] = cal_full[hol_col].values
tmp["dom_norm"] = (tmp["dom"] - tmp["dom"].mean()) / (tmp["dom"].std() + 1e-8)

dow_dummies = pd.get_dummies(tmp["dow"], prefix="dow", drop_first=True)
qtr_dummies = pd.get_dummies(tmp["qtr"], prefix="qtr", drop_first=True)

exog_full = pd.concat([dow_dummies, qtr_dummies, tmp[["dom_norm", "holiday"]]], axis=1)
exog_full = (
    exog_full.apply(pd.to_numeric, errors="coerce")
    .replace([np.inf, -np.inf], np.nan)
    .fillna(0.0)
    .astype("float64")
)
exog_full = exog_full.reindex(df.index)

# TRAIN / TEST SPLIT 
train = df.loc["2022-01-01":"2023-12-31"]
test  = df.loc["2024-01-01":"2024-12-31"]

y_train = train["Arrivals"].values.astype(float)
y_test  = test["Arrivals"].values.astype(float)
train_exog = exog_full.loc[train.index]
test_exog  = exog_full.loc[test.index]
test_index = test.index

if np.all(y_train == 0) or np.all(y_test == 0):
    raise ValueError("Train or test arrivals are all zeros. Check inputs.")

# Utility metrics
def da_window(y_true_vec, y_pred_vec):
    yt, yp = np.asarray(y_true_vec, float), np.asarray(y_pred_vec, float)
    if len(yt) < 2 or len(yp) < 2:
        return np.nan
    return (np.sign(np.diff(yt)) == np.sign(np.diff(yp))).mean() * 100.0

# MAPE helper 
def mape_percent(y_true_vec, y_pred_vec):
    yt, yp = np.asarray(y_true_vec, float), np.asarray(y_pred_vec, float)
    mask = yt != 0
    if mask.sum() == 0:
        return np.nan
    return np.mean(np.abs((yt[mask] - yp[mask]) / yt[mask])) * 100.0

# Re-fitting forecaster 
def refit_and_forecast_exog(history_y, history_exog, steps, future_exog):

    model = SARIMAX(
        history_y,
        exog=history_exog,
        order=(1, 1, 1),
        seasonal_order=(1, 1, 1, m),
        enforce_stationarity=False,
        enforce_invertibility=False
    )
    res = model.fit(disp=False)
    yhat = np.asarray(res.get_forecast(steps=steps, exog=future_exog).predicted_mean, dtype=float)
    return np.clip(yhat, 0.0, None)

# EVAL 
results_exog = {h: {"rmse": [], "mae": [], "mape": [], "da": []} for h in HORIZON_DAYS}
t0 = time.time()

for h in HORIZON_DAYS:
    if h == 1:
        # 1-day: direction vs previous actual
        for i in range(0, len(y_test)):
            hist_y  = np.r_[y_train, y_test[:i]]
            hist_ex = (np.vstack([train_exog.values, test_exog.values[:i]]) if i > 0 else train_exog.values)
            fut_ex  = test_exog.values[i:i+1]

            y_pred1 = refit_and_forecast_exog(hist_y, hist_ex, 1, fut_ex)[0]
            y_true1 = y_test[i]

            results_exog[h]["rmse"].append(np.sqrt(mean_squared_error([y_true1], [y_pred1])))
            results_exog[h]["mae"].append(mean_absolute_error([y_true1], [y_pred1]))
            results_exog[h]["mape"].append(mape_percent([y_true1], [y_pred1]))
            if i >= 1:
                prev_actual = y_test[i-1]
                dir_true = np.sign(y_true1 - prev_actual)
                dir_pred = np.sign(y_pred1 - prev_actual)
                results_exog[h]["da"].append(100.0 if dir_true == dir_pred else 0.0)

            if i % 30 == 0: gc.collect()
    else:
        # 2d/3d: window DA over the h-length path
        for i in range(0, len(y_test) - h + 1):
            hist_y  = np.r_[y_train, y_test[:i]]
            hist_ex = (np.vstack([train_exog.values, test_exog.values[:i]]) if i > 0 else train_exog.values)
            fut_ex  = test_exog.values[i:i+h]

            y_predh = refit_and_forecast_exog(hist_y, hist_ex, h, fut_ex)
            y_trueh = y_test[i:i+h]

            results_exog[h]["rmse"].append(np.sqrt(mean_squared_error(y_trueh, y_predh)))
            results_exog[h]["mae"].append(mean_absolute_error(y_trueh, y_predh))
            results_exog[h]["mape"].append(mape_percent(y_trueh, y_predh))
            results_exog[h]["da"].append(da_window(y_trueh, y_predh))

            if i % 30 == 0: gc.collect()

elapsed = time.time() - t0
print("\nSARIMAX (Temporal Exogenous, REFIT each origin) — Train 2022–2023 → Test 2024")
for hrs, h in zip(HOURS, HORIZON_DAYS):
    out = {metric: (np.nan if len(results_exog[h][metric]) == 0 else np.nanmean(results_exog[h][metric]))
           for metric in results_exog[h]}
    print(f"{hrs}h (~{h} day) → RMSE {out['rmse']:.2f} | MAE {out['mae']:.2f} | MAPE {out['mape']:.2f}% | DA {out['da']:.2f}%")
print(f"\nRuntime: {elapsed:.2f} seconds")

# MULTI-HORIZON TABLE (target-aligned; pre-fill & year-end safe) 
n_test = len(y_test)
preds_by_h = {h: np.full(n_test, np.nan, dtype=float) for h in HORIZON_DAYS}
max_h = max(HORIZON_DAYS)

# Pre-fill first 1–2 targets using pre-2024 origins (refit once per offset)
for offset in range(1, max_h):  # 1 and 2
    origin_len = len(y_train) - offset
    if origin_len <= 0:
        continue
    hist_y  = y_train[:origin_len]
    hist_ex = train_exog.values[:origin_len]
    fut_ex_full = test_exog.values[:max_h]  # future exog slices for Jan 1..3
    pre_hat = refit_and_forecast_exog(hist_y, hist_ex, max_h, fut_ex_full)
    for h in HORIZON_DAYS:
        target_idx = h - 1 - offset
        if 0 <= target_idx < n_test:
            preds_by_h[h][target_idx] = pre_hat[h - 1]

# Rolling through 2024 — only fill valid targets (refit at each i)
for i in range(n_test):
    hist_y  = np.r_[y_train, y_test[:i]]
    hist_ex = (np.vstack([train_exog.values, test_exog.values[:i]]) if i > 0 else train_exog.values)
    for h in HORIZON_DAYS:
        target_idx = i + h - 1
        if 0 <= target_idx < n_test:
            fut_ex = test_exog.values[i:i+h]
            yhat_h = refit_and_forecast_exog(hist_y, hist_ex, h, fut_ex)
            preds_by_h[h][target_idx] = yhat_h[h - 1]

out = pd.DataFrame({"Date": test_index, "Actual": y_test})
out["Predicted_1d"] = preds_by_h[1]
out["Predicted_2d"] = preds_by_h[2]
out["Predicted_3d"] = preds_by_h[3]
out["Error_1d"] = out["Actual"] - out["Predicted_1d"]
out["Error_2d"] = out["Actual"] - out["Predicted_2d"]
out["Error_3d"] = out["Actual"] - out["Predicted_3d"]

out.to_csv("Case2_Temporal_MultiHorizon_Predictions_Daily_2022to2024_23.csv", index=False, encoding="utf-8-sig")
print("\nSaved: Case2_Temporal_MultiHorizon_Predictions_Daily_2022to2024_23.csv")
print(out.head(10))
print(out.tail(10))


SARIMAX (Temporal Exogenous, REFIT each origin) — Train 2022–2023 → Test 2024
24h (~1 day) → RMSE 19.59 | MAE 19.59 | MAPE 5.17% | DA 80.55%
48h (~2 day) → RMSE 21.82 | MAE 19.48 | MAPE 5.14% | DA 73.97%
72h (~3 day) → RMSE 22.61 | MAE 19.48 | MAPE 5.14% | DA 73.90%

Runtime: 8137.23 seconds

Saved: Case2_Temporal_MultiHorizon_Predictions_Daily_2022to2024_23.csv
        Date  Actual  Predicted_1d  Predicted_2d  Predicted_3d   Error_1d  \
0 2024-01-01   335.0    406.683057    451.306765    445.829353 -71.683057   
1 2024-01-02   469.0    377.671344    401.544759    407.659149  91.328656   
2 2024-01-03   373.0    393.293925    371.085240    390.742321 -20.293925   
3 2024-01-04   356.0    380.075178    382.662323    366.196275 -24.075178   
4 2024-01-05   387.0    368.890369    372.586829    373.761618  18.109631   
5 2024-01-06   345.0    343.570101    341.512010    344.193588   1.429899   
6 2024-01-07   319.0    346.347269    347.501683    346.808122 -27.347269   
7 2024-01-08   431