In [47]:
# Cell 1 ‚Äî Imports & global config
import os, json, math, warnings, itertools, random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_absolute_error, mean_squared_error

warnings.filterwarnings("ignore")
plt.rcParams["figure.dpi"] = 140

# Paths
DATA_PATH  = "data/housing_adequacy_dataset.csv"
CACHE_PATH = "best_params_arima_cache.json"

# Reproducibility
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)
random.seed(RANDOM_STATE)

# Quarterly data
SEASONAL_PERIOD = 4

# Time split for holdout
CUTOFF_DATE = "2018-12-31"   # train ‚â§ cutoff, test > cutoff

# Rolling-CV controls (tuning)
INITIAL_CV_START = "2010-12-31"  # start CV later to ensure decent train size
ROLLING_STEP_FOR_TUNING = 1      # set 2 to halve folds
FOLD_LIMIT = 6                   # only last K folds; None = all
MIN_TRAIN_PER_FOLD = 16          # min non-NaN points required in a fold (quarters)
PRUNE_DURING_TUNING = True       # stop a candidate early if already worse than best

# Tuning grids (keep small; expand after pipeline works)
ARIMA_GRID = {"p":[0,1,2], "d":[0,1], "q":[0,1,2]}
SARIMA_GRID = {
    "p":[0,1,2], "d":[0,1], "q":[0,1,2],
    "P":[0,1],   "D":[0,1], "Q":[0,1], "s":[SEASONAL_PERIOD]
}


In [48]:
# Cell 2 ‚Äî Data prep (regular quarterly) & splits

def build_univariate_frame(df: pd.DataFrame):
    """
    Return a regular quarterly panel with columns: province, quarter (datetime), y (float).
    Reindexes each province to a full quarterly DatetimeIndex. Internal NaNs allowed.
    """
    df = df.sort_values(["province", "quarter"]).copy()
    df["quarter"] = pd.PeriodIndex(df["quarter"], freq="Q").to_timestamp()
    out = []
    for prov, g in df.groupby("province"):
        start = g["quarter"].min()
        end   = g["quarter"].max()
        idx = pd.date_range(start, end, freq="Q")
        y = (g.set_index("quarter")["dwelling_starts"]
               .astype(float)
               .reindex(idx))  # may introduce NaNs if gaps
        out.append(pd.DataFrame({"province":prov, "quarter":idx, "y":y}))
    uni = pd.concat(out, ignore_index=True)
    return uni

def chrono_split(df, cutoff=CUTOFF_DATE):
    cutoff = pd.Timestamp(cutoff)
    tr = df[df["quarter"] <= cutoff].copy()
    te = df[df["quarter"] >  cutoff].copy()
    return tr, te

def rolling_split(df, initial=None, step=1, fh=1):
    """
    Expanding-window CV over the full panel (all provinces).
    Yields (train_df, test_df) for the next fh quarters (panel slices).
    """
    df = df.copy()
    df["quarter"] = pd.to_datetime(df["quarter"])  # <- harden
    dates = pd.Index(df["quarter"].unique()).sort_values()  # robust, no mixed types

    # normalize 'initial' to Timestamp
    if initial is None:
        initial = dates[int(0.6 * len(dates))]
    else:
        initial = pd.Timestamp(initial)

    # use Index.get_indexer instead of np.searchsorted to avoid dtype issues
    start_idx = dates.get_indexer([initial], method="ffill")[0]
    start_idx = max(start_idx, 0)

    for i in range(start_idx, len(dates) - fh, step):
        train_end = dates[i]
        test_slice = dates[i+1 : i+1+fh]
        tr = df[df["quarter"] <= train_end].copy()
        te = df[df["quarter"].isin(test_slice)].copy()
        if not te.empty:
            yield tr, te



In [49]:
# Cell 3 ‚Äî Metrics & baselines

def metrics(y_true, y_pred):
    mae  = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    eps = 1e-8
    smape = 100 * np.mean(2 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred) + eps))
    return {"MAE": mae, "RMSE": rmse, "sMAPE": smape}

def mase(y_true: pd.Series, y_pred: pd.Series, y_train: pd.Series, season:int=1, eps:float=1e-12):
    if season == 1:
        denom = np.mean(np.abs(y_train.diff().dropna()))
    else:
        denom = np.mean(np.abs(y_train.diff(season).dropna()))
    denom = max(denom if denom is not None else np.nan, eps)
    return float(np.mean(np.abs(y_true - y_pred))) / denom


In [50]:
# Cell 4 ‚Äî ARIMA/SARIMA wrappers

def fit_sarimax_endog(y_tr: pd.Series, order=(1,1,1), seasonal_order=(0,0,0,0)):
    """
    y_tr: pandas Series indexed by a regular quarterly DatetimeIndex.
    missing='drop' allows internal NaNs; we also disable strict stationarity/invertibility.
    """
    model = SARIMAX(
        endog=y_tr,
        order=order,
        seasonal_order=seasonal_order,
        enforce_stationarity=False,
        enforce_invertibility=False,
        trend=None,
        missing="drop"   # <‚Äî key for robustness
    )
    res = model.fit(disp=False, method_kwargs={"warn_convergence": False})
    return res

def forecast_steps(res, steps=1):
    fc = res.get_forecast(steps=steps)
    return fc.predicted_mean


In [51]:
# Cell 5 ‚Äî Tuning utilities

def cartesian_product(grid_dict):
    keys = list(grid_dict.keys())
    for values in itertools.product(*[grid_dict[k] for k in keys]):
        yield dict(zip(keys, values))

def rolling_score_series(order, seasonal_order, series_df, fh=1, season_for_mase=1,
                         initial=INITIAL_CV_START, step=ROLLING_STEP_FOR_TUNING,
                         fold_limit=FOLD_LIMIT, min_train=MIN_TRAIN_PER_FOLD,
                         prune=PRUNE_DURING_TUNING, verbose=False):
    """
    Score a single province series across multiple folds.
    Returns average MASE (lower = better) or inf if no valid fold.
    """
    
    series_df = series_df.copy()
    series_df["quarter"] = pd.to_datetime(series_df["quarter"])
    
    folds = list(rolling_split(series_df, initial=initial, step=step, fh=fh))
    if fold_limit is not None and len(folds) > fold_limit:
        folds = folds[-fold_limit:]

    scores = []
    first_error = None
    for (tr_all, te_all) in folds:
        y_tr = tr_all["y"].astype(float)
        y_te = te_all["y"].astype(float)
        if y_tr.notna().sum() < min_train:
            continue
        try:
            res = fit_sarimax_endog(y_tr, order=order, seasonal_order=seasonal_order)
            y_hat = forecast_steps(res, steps=len(y_te)).values
            s = mase(y_te, pd.Series(y_hat, index=y_te.index), y_tr, season=season_for_mase)
            if np.isfinite(s):
                scores.append(s)
        except Exception as e:
            if first_error is None:
                first_error = f"{type(e).__name__}: {e}"

        # simple pruning: if growing mean already huge vs na√Øve, bail
        if prune and scores:
            cur = np.mean(scores)
            if cur > 5.0:  # heuristic threshold
                break

    if not scores:
        if verbose and first_error:
            print(f"   failed for order={order}, seas={seasonal_order} -> {first_error}")
        return np.inf
    return float(np.mean(scores))

def tune_one_series(series_df, model_type="arima", fh=1, verbose=False):
    """
    Returns (best_order, best_seasonal_order, best_score).
    For ARIMA: seasonal_order=(0,0,0,0).
    For SARIMA: seasonal_order from SARIMA_GRID.
    """
    if model_type == "arima":
        best = (np.inf, (0,0,0), (0,0,0,0))
        for g in cartesian_product(ARIMA_GRID):
            order = (g["p"], g["d"], g["q"])
            seas  = (0,0,0,0)
            score = rolling_score_series(order, seas, series_df, fh=fh, season_for_mase=1, verbose=verbose)
            if verbose: print(f"ARIMA{order} -> {score:.3f}")
            if score < best[0]:
                best = (score, order, seas)
        return best[1], best[2], best[0]

    elif model_type == "sarima":
        best = (np.inf, (0,0,0), (0,0,0,SEASONAL_PERIOD))
        for g in cartesian_product(SARIMA_GRID):
            order = (g["p"], g["d"], g["q"])
            seas  = (g["P"], g["D"], g["Q"], g["s"])
            score = rolling_score_series(order, seas, series_df, fh=fh, season_for_mase=SEASONAL_PERIOD, verbose=verbose)
            if verbose: print(f"SARIMA{order}x{seas} -> {score:.3f}")
            if score < best[0]:
                best = (score, order, seas)
        return best[1], best[2], best[0]

    else:
        raise ValueError("model_type must be 'arima' or 'sarima'")

def tune_all_provinces(train_df, models_to_run=("arima","sarima"), fh=1):
    """
    Returns dict: {(province, model_type): {"order":..., "seasonal_order":..., "score":...}}
    """
    best_params = {}
    provinces = list(train_df["province"].unique())
    total = len(provinces) * len(models_to_run)
    task = 0
    print(f"üßÆ Tuning {len(provinces)} provinces √ó {len(models_to_run)} models")
    for prov, gtr in train_df.groupby("province"):
        series = gtr[["quarter","y"]].reset_index(drop=True)
        for m in models_to_run:
            task += 1
            print(f"‚Üí [{task}/{total}] {prov.upper()} ‚Äî {m.upper()} ...", end=" ", flush=True)
            order, seas, score = tune_one_series(series, model_type=m, fh=fh, verbose=False)
            best_params[(prov, m)] = {"order": order, "seasonal_order": seas, "score": float(score)}
            print(f"best MASE={score:.3f}")
    print("‚úÖ Tuning done.")
    return best_params


In [52]:
# Cell 6 ‚Äî Cache best params (with force_retune + sanity check)
def load_or_tune_best_params(train_df, models_to_run=("arima","sarima"),
                             cache_path=CACHE_PATH, fh=1, force_retune=False,
                             min_success_ratio=0.6):
    def _tune():
        best = tune_all_provinces(train_df, models_to_run=models_to_run, fh=fh)
        serial = {"|".join(k): v for k, v in best.items()}
        with open(cache_path, "w") as f:
            json.dump(serial, f, indent=2)
        print(f"‚úÖ Saved tuned best parameters to {cache_path}")
        return best

    if force_retune or (not os.path.exists(cache_path)):
        print("‚è≥ Running tuning from scratch...")
        return _tune()

    print(f"üîÅ Loading cached best parameters from {cache_path}")
    with open(cache_path, "r") as f:
        best_params = json.load(f)
    best_params = {tuple(k.split("|")): v for k, v in best_params.items()}

    # If cache is mostly bad (inf), re-tune automatically
    scores = [v.get("score", np.inf) for v in best_params.values()]
    ok = np.isfinite(scores).sum() if len(scores) else 0
    ratio = ok / max(len(scores), 1)
    if ratio < min_success_ratio:
        print(f"‚ö†Ô∏è Cache quality low ({ok}/{len(scores)} finite). Retuning‚Ä¶")
        return _tune()

    return best_params


In [53]:
# Cell 7 ‚Äî Holdout predictions & overlays

def holdout_predictions(train_df, test_df, best_params, models_to_run=("arima","sarima")):
    rows = []

    # Baselines first
    for prov, gtr in train_df.groupby("province"):
        gte = test_df[test_df["province"] == prov]
        if gte.empty: 
            continue
        y_tr = gtr["y"].astype(float).copy()
        y_te = gte["y"].astype(float).copy()

        # Naive-1 (prev quarter)
        y_all = pd.concat([y_tr, y_te])
        yhat1 = y_all.shift(1).loc[y_te.index]
        met1 = metrics(y_te, yhat1)
        met1["MASE"] = mase(y_te, yhat1, y_tr, season=1)
        for q, yt, yp in zip(gte["quarter"].values, y_te.values, yhat1.values):
            rows.append({"province":prov, "model":"naive1", "quarter":pd.to_datetime(q), "y_true":yt, "y_pred":yp, **met1})

        # Naive-4 (seasonal)
        yhat4 = y_all.shift(SEASONAL_PERIOD).loc[y_te.index]
        met4 = metrics(y_te, yhat4)
        met4["MASE"] = mase(y_te, yhat4, y_tr, season=SEASONAL_PERIOD)
        for q, yt, yp in zip(gte["quarter"].values, y_te.values, yhat4.values):
            rows.append({"province":prov, "model":"naive4", "quarter":pd.to_datetime(q), "y_true":yt, "y_pred":yp, **met4})

    # ARIMA / SARIMA (tuned)
    for prov, gtr in train_df.groupby("province"):
        gte = test_df[test_df["province"] == prov]
        if gte.empty:
            continue
        y_tr = gtr["y"].astype(float)
        y_te = gte["y"].astype(float)

        for m in models_to_run:
            params = best_params.get((prov, m))
            if not params:
                continue
            order = tuple(params["order"])
            seas  = tuple(params["seasonal_order"])
            try:
                res = fit_sarimax_endog(y_tr, order=order, seasonal_order=seas)
                yhat = forecast_steps(res, steps=len(y_te)).values
                met = metrics(y_te, yhat)
                # Reasonable MASE baseline choices:
                season_for_mase = 1 if m == "arima" else SEASONAL_PERIOD
                met["MASE"] = mase(y_te, pd.Series(yhat, index=gte.index), y_tr, season=season_for_mase)
                for q, yt, yp in zip(gte["quarter"].values, y_te.values, yhat):
                    rows.append({"province":prov, "model":m, "quarter":pd.to_datetime(q), "y_true":yt, "y_pred":yp, **met})
            except Exception as e:
                # fallback (skip model for this province)
                print(f"‚ö†Ô∏è  {prov}-{m} failed on holdout with {e}")

    pred_df = pd.DataFrame(rows)
    return pred_df

def summarize_holdout(pred_df):
    by_model = pred_df.groupby("model")[["MAE","RMSE","sMAPE","MASE"]].mean().round(2).sort_values("MASE")
    print("=== Holdout averages across provinces ===")
    display(by_model)
    by_pm = pred_df.groupby(["province","model"])[["MAE","RMSE","sMAPE","MASE"]].mean().round(2)
    return by_model, by_pm

def plot_holdout_overlay(pred_df, models_to_plot=("naive1","naive4","arima","sarima")):
    provs = sorted(pred_df["province"].unique())
    cols = 4
    rows = math.ceil(len(provs) / cols)
    fig, axes = plt.subplots(rows, cols, figsize=(4*cols, 2.6*rows), sharex=False, sharey=False)
    axes = axes.ravel()

    for i, prov in enumerate(provs):
        ax = axes[i]
        g = pred_df[pred_df["province"] == prov]
        truth = g.drop_duplicates("quarter")[["quarter","y_true"]].sort_values("quarter")
        ax.plot(truth["quarter"], truth["y_true"], color="black", linewidth=2.0, label="True")

        for m in models_to_plot:
            gm = g[g["model"] == m].sort_values("quarter")
            if gm.empty: 
                continue
            ax.plot(gm["quarter"], gm["y_pred"], "--", linewidth=1.5, label=m.upper())

        ax.set_title(prov.upper(), fontsize=10)
        ax.tick_params(axis="x", labelrotation=45)
        ax.grid(True, linestyle="--", alpha=0.3)

    for j in range(i+1, rows*cols):
        fig.delaxes(axes[j])
    handles, labels = axes[0].get_legend_handles_labels()
    fig.legend(handles, labels, loc="lower center", ncol=min(len(models_to_plot)+1, 6), frameon=False, bbox_to_anchor=(0.5, -0.02))
    fig.suptitle("True vs Predicted (Holdout) ‚Äî Na√Øve / ARIMA / SARIMA", y=1.02, fontsize=12)
    plt.tight_layout()
    plt.show()


In [54]:
# Cell 8 ‚Äî Rolling forecast evolution (visual)

def rolling_evolution_plot(df_all, best_params, model_name="sarima", fh=1, initial=INITIAL_CV_START):
    rows = []
    for tr, te in rolling_split(df_all, initial=initial, fh=fh):
        cutoff = tr["quarter"].max()
        for prov, gtr in tr.groupby("province"):
            gte = te[te["province"] == prov]
            if gte.empty:
                continue
            params = best_params.get((prov, model_name))
            if not params:
                continue
            order = tuple(params["order"])
            seas  = tuple(params["seasonal_order"])
            y_tr = gtr["y"].astype(float)
            if y_tr.notna().sum() < MIN_TRAIN_PER_FOLD:
                continue
            try:
                res = fit_sarimax_endog(y_tr, order=order, seasonal_order=seas)
                yhat = forecast_steps(res, steps=len(gte)).values
                rows.append(pd.DataFrame({
                    "cutoff": cutoff,
                    "province": prov,
                    "quarter": gte["quarter"].values,
                    "y_true": gte["y"].values,
                    "y_pred": yhat
                }))
            except Exception:
                pass

    if not rows:
        print("No rolling predictions to plot.")
        return

    df = pd.concat(rows, ignore_index=True)
    df["quarter"] = pd.to_datetime(df["quarter"])
    df["cutoff"]  = pd.to_datetime(df["cutoff"])
    df = df.sort_values(["province","cutoff","quarter"])

    cuts = sorted(df["cutoff"].unique())
    cut_rank = {c:i for i,c in enumerate(cuts)}
    df["cut_rank"] = df["cutoff"].map(cut_rank)

    provs = sorted(df["province"].unique())
    cols = 4
    rows = math.ceil(len(provs)/cols)
    fig, axes = plt.subplots(rows, cols, figsize=(4*cols, 2.6*rows), sharex=False, sharey=False)
    axes = axes.ravel()

    for i, prov in enumerate(provs):
        ax = axes[i]
        g = df[df["province"] == prov]
        truth = g.drop_duplicates("quarter")[["quarter","y_true"]].sort_values("quarter")
        ax.plot(truth["quarter"], truth["y_true"], color="black", linewidth=1.8, label="True")
        for c, gc in g.groupby("cutoff"):
            r = cut_rank[c]
            alpha = 0.25 + 0.6 * (r / (len(cuts)-1 if len(cuts) > 1 else 1))
            ax.plot(gc["quarter"], gc["y_pred"], "--", color="tab:orange", alpha=alpha, linewidth=1.2)
        latest = g[g["cutoff"] == cuts[-1]].sort_values("quarter")
        if not latest.empty:
            ax.plot(latest["quarter"], latest["y_pred"], "--", color="tab:orange", linewidth=1.8, label="Latest")
        ax.set_title(prov.upper(), fontsize=10)
        ax.tick_params(axis="x", labelrotation=45)
        ax.grid(True, linestyle="--", alpha=0.3)

    for j in range(i+1, rows*cols):
        fig.delaxes(axes[j])

    handles, labels = axes[0].get_legend_handles_labels()
    fig.legend(handles[:2], labels[:2], loc="lower center", ncol=2, frameon=False, bbox_to_anchor=(0.5, -0.02))
    fig.suptitle(f"Rolling forecast evolution ‚Äî {model_name.upper()}", y=1.02, fontsize=12)
    plt.tight_layout()
    plt.show()


In [55]:
# Cell 9 ‚Äî Run all

# 1) Load & prep
raw = pd.read_csv(DATA_PATH)
uni_df = build_univariate_frame(raw)

# 2) Holdout split
train, test = chrono_split(uni_df, cutoff=CUTOFF_DATE)
print(f"Train: {train['quarter'].min().date()} ‚Üí {train['quarter'].max().date()} | "
      f"Test: {test['quarter'].min().date()} ‚Üí {test['quarter'].max().date()}")

# 3) Tune or load cache
MODELS = ("arima","sarima")
best_params = load_or_tune_best_params(train, MODELS, CACHE_PATH, fh=1, force_retune=True)

DEFAULTS = {
    "arima":  {"order": (1,1,1), "seasonal_order": (0,0,0,0)},
    "sarima": {"order": (0,1,1), "seasonal_order": (0,1,1,4)},
}

provinces = sorted(train["province"].unique())
for prov in provinces:
    for m in MODELS:
        v = best_params.get((prov, m))
        if (v is None) or (not np.isfinite(v.get("score", np.inf))):
            print(f"‚ÑπÔ∏è  Using default {m.upper()} for {prov}")
            best_params[(prov, m)] = {
                "order": DEFAULTS[m]["order"],
                "seasonal_order": DEFAULTS[m]["seasonal_order"],
                "score": float("nan"),
            }

# (Optional) drop hopeless results (score==inf) to avoid holdout failures
for k, v in list(best_params.items()):
    if not np.isfinite(v.get("score", np.inf)):
        print(f"‚ö†Ô∏è  Dropping {k} due to inf score during tuning.")
        del best_params[k]

# 4) Holdout predictions + summary
pred_holdout = holdout_predictions(train, test, best_params, models_to_run=MODELS)
by_model, by_pm = summarize_holdout(pred_holdout)

# 5) Overlays
plot_holdout_overlay(pred_holdout, models_to_plot=("naive1","naive4","arima","sarima"))

# 6) Rolling evolution (pick ARIMA or SARIMA)
rolling_evolution_plot(uni_df, best_params, model_name="sarima", fh=1, initial=INITIAL_CV_START)


Train: 1990-03-31 ‚Üí 2018-12-31 | Test: 2019-03-31 ‚Üí 2025-06-30
‚è≥ Running tuning from scratch...
üßÆ Tuning 11 provinces √ó 2 models
‚Üí [1/22] AB ‚Äî ARIMA ... best MASE=inf
‚Üí [2/22] AB ‚Äî SARIMA ... best MASE=inf
‚Üí [3/22] BC ‚Äî ARIMA ... best MASE=inf
‚Üí [4/22] BC ‚Äî SARIMA ... best MASE=inf
‚Üí [5/22] CAN ‚Äî ARIMA ... best MASE=inf
‚Üí [6/22] CAN ‚Äî SARIMA ... best MASE=inf
‚Üí [7/22] MB ‚Äî ARIMA ... best MASE=inf
‚Üí [8/22] MB ‚Äî SARIMA ... best MASE=inf
‚Üí [9/22] NB ‚Äî ARIMA ... best MASE=inf
‚Üí [10/22] NB ‚Äî SARIMA ... best MASE=inf
‚Üí [11/22] NL ‚Äî ARIMA ... best MASE=inf
‚Üí [12/22] NL ‚Äî SARIMA ... best MASE=inf
‚Üí [13/22] NS ‚Äî ARIMA ... best MASE=inf
‚Üí [14/22] NS ‚Äî SARIMA ... best MASE=inf
‚Üí [15/22] ON ‚Äî ARIMA ... best MASE=inf
‚Üí [16/22] ON ‚Äî SARIMA ... best MASE=inf
‚Üí [17/22] PE ‚Äî ARIMA ... best MASE=inf
‚Üí [18/22] PE ‚Äî SARIMA ... best MASE=inf
‚Üí [19/22] QC ‚Äî ARIMA ... best MASE=inf
‚Üí [20/22] QC ‚Äî SARIMA ... best MASE=in

ValueError: Input contains NaN.