# Summary pipeline (multi-tenor arb outcomes)
This notebook runs the arb outcome event-study pipeline using real repo data only (no synthetic seeds).


In [None]:
from __future__ import annotations

import ast
import hashlib
import json
import re
import logging
import shutil
from datetime import datetime
from pathlib import Path
import sys, os
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.diagnostic import acorr_ljungbox

# Repo path hygiene (template behavior preserved, but robust if run from different working dirs)
sys.path.insert(2, "../src")
# If running inside a /src directory, step back to repo root
try:
    if "src" in os.getcwd().replace("\\","/").split("/"):
        os.chdir(os.path.pardir)
except Exception:
    pass

# --- Try to import project utilities; fall back to local minimal implementations if unavailable ---
try:
    from slr_bucket.econometrics.event_study import add_event_time, event_study_regression, jump_estimator
    from slr_bucket.io import build_data_catalog, load_any_table, resolve_dataset_path, as_daily_date, coerce_num
except Exception as _imp_exc:  # noqa: BLE001
    logging.getLogger(__name__).warning("slr_bucket imports unavailable (%s). Using local fallbacks.", _imp_exc)

    def as_daily_date(s):
        return pd.to_datetime(s, errors="coerce").dt.floor("D")

    def coerce_num(x):
        return pd.to_numeric(x, errors="coerce")

    def resolve_dataset_path(name: str, expected_dir: Path | None = None) -> Path:
        # Minimal: treat `name` as path if it exists; else look under expected_dir for common extensions.
        p = Path(name)
        if p.exists():
            return p
        if expected_dir is None:
            expected_dir = Path.cwd()
        for ext in (".csv", ".parquet"):
            cand = expected_dir / f"{name}{ext}"
            if cand.exists():
                return cand
        raise FileNotFoundError(f"Could not resolve dataset path for {name} under {expected_dir}")

    def load_any_table(path: Path | str) -> pd.DataFrame:
        path = Path(path)
        if path.suffix.lower() == ".csv":
            return pd.read_csv(path)
        if path.suffix.lower() == ".parquet":
            # Parquet requires pyarrow/fastparquet; raise a helpful error
            raise ImportError("Parquet read requires pyarrow/fastparquet in this environment.")
        raise ValueError(f"Unsupported file type: {path.suffix}")

    def build_data_catalog(root: Path) -> pd.DataFrame:
        # Lightweight catalog: list csv files under root
        rows=[]
        for p in root.rglob("*.csv"):
            try:
                df=pd.read_csv(p, nrows=2)
                cols=",".join(df.columns.astype(str))
                rows.append({"path": str(p), "layer": p.parent.name, "rows": None, "columns": cols,
                             "frequency": None, "date_min": None, "date_max": None, "key_columns": "date",
                             "join_hints": ""})
            except Exception:
                rows.append({"path": str(p), "layer": p.parent.name, "rows": None, "columns": None,
                             "frequency": None, "date_min": None, "date_max": None, "key_columns": "date",
                             "join_hints": ""})
        return pd.DataFrame(rows)

    def add_event_time(df: pd.DataFrame, event_date: str) -> pd.DataFrame:
        out=df.copy()
        t0=pd.Timestamp(event_date)
        out["event_time"]=(pd.to_datetime(out["date"]) - t0).dt.days
        return out

    def _nw_ols(y: pd.Series, X: pd.DataFrame, lags: int):
        Xc = sm.add_constant(X, has_constant="add")
        # Coerce to numeric to avoid object-dtype issues
        Xc = Xc.apply(pd.to_numeric, errors="coerce").astype(float)
        yv = pd.to_numeric(y, errors="coerce")
        reg = pd.concat([yv.rename("y"), Xc], axis=1).dropna()
        if reg.empty:
            raise ValueError("Empty regression sample after numeric coercion/dropna.")
        res = sm.OLS(reg["y"], reg.drop(columns=["y"]), missing="drop").fit()
        robust = res.get_robustcov_results(cov_type="HAC", maxlags=lags)
        return robust

    def jump_estimator(df: pd.DataFrame, y_col: str, event_date: str, window: int, controls: list[str], hac_lags: int):
        sub = add_event_time(df, event_date)
        sub = sub[sub["event_time"].between(-window, window)].copy()
        sub["post"] = (sub["event_time"] >= 0).astype(int)
        use_controls=[c for c in controls if c in sub.columns]
        cols=[y_col,"post",*use_controls]
        reg=sub[cols].dropna()
        if reg.empty:
            return (np.nan, np.nan, 0)
        X=reg[["post",*use_controls]]
        y=reg[y_col]
        robust=_nw_ols(y, X, lags=hac_lags)
        i=robust.model.exog_names.index("post")
        return (float(robust.params[i]), float(robust.bse[i]), int(robust.nobs))

    def event_study_regression(df: pd.DataFrame, y_col: str, event_date: str, bins: list[tuple[int,int]], controls: list[str], hac_lags: int):
        sub = add_event_time(df, event_date)
        sub = sub[sub["event_time"].between(min(a for a,b in bins), max(b for a,b in bins))].copy()
        # Assign bins
        def _assign_bin(k):
            for a,b in bins:
                if a<=k<=b: return f"bin_[{a},{b}]"
            return np.nan
        sub["bin"]=sub["event_time"].apply(_assign_bin)
        # Omit baseline bin if present
        baseline = "bin_[-20,-1]" if "bin_[-20,-1]" in sub["bin"].unique().tolist() else None
        dummies=pd.get_dummies(sub["bin"])
        if baseline and baseline in dummies.columns:
            dummies=dummies.drop(columns=[baseline])
        use_controls=[c for c in controls if c in sub.columns]
        reg=pd.concat([sub[[y_col]], dummies, sub[use_controls]], axis=1).dropna()
        if reg.empty or dummies.shape[1]==0:
            return pd.DataFrame()
        y=reg[y_col]
        X=reg.drop(columns=[y_col])
        robust=_nw_ols(y, X, lags=hac_lags)
        out=[]
        for term in dummies.columns:
            if term in robust.model.exog_names:
                j=robust.model.exog_names.index(term)
                est=float(robust.params[j]); se=float(robust.bse[j])
                out.append({"term": term, "estimate": est, "se": se,
                            "ci_low": est-1.96*se, "ci_high": est+1.96*se, "n": int(robust.nobs)})
        return pd.DataFrame(out)


def slugify(s: str) -> str:
    s = str(s)
    s = re.sub(r"[^A-Za-z0-9]+", "_", s).strip("_")
    return s[:80] if s else "series"


In [None]:
CONFIG = {
    "outcomes_file": "data/series/equity_spot_spread_SPY.csv",
    "series_columns": ["spread_SPY_filtered"],
    "series_kind": "single",
    "series_label": "Equity spotâ€“futures (SPY) arbitrage",
    "events": ["2020-04-01", "2021-03-19", "2021-03-31"],
    "windows": [20, 60],
    "event_bins": [(-60, -41), (-40, -21), (-20, -1), (0, 0), (1, 20), (21, 40), (41, 60)],
    "total_controls": ["VIX", "HY_OAS", "BAA10Y", "issu_7_bil", "issu_14_bil", "issu_30_bil"],
    "direct_controls": ["VIX", "HY_OAS", "BAA10Y", "issu_7_bil", "issu_14_bil", "issu_30_bil", "SOFR", "spr_tgcr", "spr_effr"],
    "hac_lags": 5,
}
repo_root = Path.cwd().parent
cfg_hash = hashlib.sha256(json.dumps(CONFIG, sort_keys=True).encode()).hexdigest()[:12]
run_stamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
run_dir = repo_root / "outputs" / "summary_pipeline" / f"{run_stamp}_{cfg_hash}"
for sub in ["figures", "tables", "data", "logs"]:
    (run_dir / sub).mkdir(parents=True, exist_ok=True)
latest_dir = repo_root / "outputs" / "summary_pipeline" / "latest"
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s - %(message)s", handlers=[logging.FileHandler(run_dir / "logs" / "pipeline.log"), logging.StreamHandler()], force=True)
logger = logging.getLogger("summary_pipeline_multi")
run_dir


## Data map for the new `/data` structure
Used in this run:
- Outcomes: `data/series/tips_treasury_implied_rf_2010.(parquet|csv)` (`arb_*` only).
- Controls (preferred): `data/intermediate/analysis_panel.csv` if all required columns are present.
- Controls (fallback):
  - `data/raw/event_inputs/controls_vix_creditspreads_fred.(parquet|csv)`
  - `data/raw/event_inputs/repo_rates_combined.(parquet|csv)` or `repo_rates_fred`
  - `data/raw/event_inputs/treasury_issuance_by_tenor_fiscaldata.(parquet|csv)`
- Mechanism proxies (optional):
  - `primary_dealer_stats_ofr_stfm_nypd_long`
  - `bank_exposure_y9c_agg_daily`


In [None]:
catalog = build_data_catalog(repo_root / "data")
catalog.to_csv(run_dir / "data" / "data_catalog.csv", index=False)
try:
    catalog.to_parquet(run_dir / "data" / "data_catalog.parquet", index=False)
except Exception as exc:  # noqa: BLE001
    (run_dir / "data" / "data_catalog.parquet.error.txt").write_text(str(exc), encoding="utf-8")
catalog.to_markdown(run_dir / "data" / "data_catalog.md", index=False)
catalog.head(20)


In [None]:
# Load outcome series (template: produce long panel with columns: date, series, y)
outcome_path = (repo_root / CONFIG["outcomes_file"]).resolve()
outcomes = pd.read_csv(outcome_path)
outcomes["date"] = pd.to_datetime(outcomes["date"], errors="coerce")
series_cols = [c for c in CONFIG["series_columns"] if c in outcomes.columns]
missing = sorted(set(CONFIG["series_columns"]) - set(series_cols))
if missing:
    raise ValueError(f"Missing expected series columns: {missing}")

long = outcomes[["date", *series_cols]].melt(id_vars=["date"], var_name="series", value_name="y")
long["y"] = pd.to_numeric(long["y"], errors="coerce")

# Series labeling / ordering helper
def _series_key(s: str):
    s = str(s)
    # Try to extract tenor from patterns like *_2Y, *_5Y, arb_2, etc.
    m = re.search(r"(\d+)\s*Y\b", s)
    if m:
        return int(m.group(1))
    m = re.search(r"arb_(\d+)", s)
    if m:
        return int(m.group(1))
    return s  # fallback string

long["series_key"] = long["series"].apply(_series_key)
long = long.dropna(subset=["date", "y"]).sort_values(["series_key", "date"]).reset_index(drop=True)

# Unit sanity check
value_q = long["y"].abs().quantile([0.5, 0.9, 0.99]).to_dict()
unit_note = "Likely basis points" if value_q.get(0.5, 0) > 0.2 else "Likely decimal units"
{"outcome_path": str(outcome_path), "series_cols": series_cols, "value_q": value_q, "unit_note": unit_note}

arb_long = long[["date","series","y","series_key"]].copy()


In [None]:
# Controls: prefer intermediate analysis_panel if valid, else fallback build from raw.
# def build_controls_panel():

needed = set(CONFIG["direct_controls"])
try:
    p = resolve_dataset_path("analysis_panel", expected_dir=repo_root / "data" / "intermediate")
    panel = load_any_table(p)
    panel["date"] = pd.to_datetime(panel["date"], errors="coerce")
    if needed.issubset(set(panel.columns)):
        logger.info("Using controls from intermediate analysis_panel: %s", p)
        controls =  panel[["date", *sorted(needed)]].copy() #, str(p)
except Exception as exc:
    logger.warning("analysis_panel unavailable/invalid (%s), using raw fallback", exc)

fred = load_any_table(resolve_dataset_path("controls_vix_creditspreads_fred", expected_dir=repo_root / "data" / "raw" / "event_inputs"))
fred["date"] = pd.to_datetime(fred["date"], errors="coerce")
fred["date"] = as_daily_date(fred["date"])
try:
    repo = load_any_table(resolve_dataset_path("repo_rates_combined", expected_dir=repo_root / "data" / "raw" / "event_inputs"))
except FileNotFoundError:
    repo = load_any_table(resolve_dataset_path("repo_rates_fred", expected_dir=repo_root / "data" / "raw" / "event_inputs"))
repo["date"] = pd.to_datetime(repo["date"], errors="coerce")
repo["date"] = as_daily_date(repo["date"])
repo = repo.rename(columns={"TGCR":"tgcr", "EFFR":"effr"})
if "spr_tgcr" not in repo.columns and {"SOFR","tgcr"}.issubset(repo.columns):
    repo["spr_tgcr"] = pd.to_numeric(repo["tgcr"], errors="coerce") - pd.to_numeric(repo["SOFR"], errors="coerce")
if "spr_effr" not in repo.columns and {"SOFR","effr"}.issubset(repo.columns):
    repo["spr_effr"] = pd.to_numeric(repo["effr"], errors="coerce") - pd.to_numeric(repo["SOFR"], errors="coerce")

issu = load_any_table(resolve_dataset_path("treasury_issuance_by_tenor_fiscaldata", expected_dir=repo_root / "data" / "raw" / "event_inputs"))
issu["date"] = pd.to_datetime(issu.get("issue_date"), errors="coerce")
issu["date"] = as_daily_date(issu["date"])
issu["tenor_bucket"] = pd.to_numeric(issu["tenor_bucket"], errors="coerce")
issu["issuance_amount"] = pd.to_numeric(issu["issuance_amount"], errors="coerce") / 1e9
d = issu.pivot_table(index="date", columns="tenor_bucket", values="issuance_amount", aggfunc="sum").reset_index()

# Robustly rename tenor-bucket columns to issu_*_bil (handles int/float/str column labels)
rename_map = {}
for col in d.columns:
    if col == "date":
        continue
    try:
        v = float(col)
    except Exception:
        continue
    if abs(v - 7.0) < 1e-9:
        rename_map[col] = "issu_7_bil"
    elif abs(v - 10.0) < 1e-9:
        rename_map[col] = "issu_10_bil"
    elif abs(v - 14.0) < 1e-9:
        rename_map[col] = "issu_14_bil"
    elif abs(v - 20.0) < 1e-9:
        rename_map[col] = "issu_20_bil"
    elif abs(v - 30.0) < 1e-9:
        rename_map[col] = "issu_30_bil"
d = d.rename(columns=rename_map)

# Ensure required issuance controls exist (zeros if not present in file)
for c in ["issu_7_bil", "issu_14_bil", "issu_30_bil", "issu_10_bil", "issu_20_bil"]:
    if c not in d.columns:
        d[c] = 0.0

# If 14y bucket absent, approximate as 10y+20y (as in prior logic)
if d["issu_14_bil"].fillna(0.0).abs().sum() == 0.0:
    d["issu_14_bil"] = d.get("issu_10_bil", 0.0) + d.get("issu_20_bil", 0.0)

for c in ["issu_7_bil", "issu_14_bil", "issu_30_bil"]:
    d[c] = pd.to_numeric(d[c], errors="coerce").fillna(0.0)

# Keep only the issuance controls used in the design
d = d[["date", "issu_7_bil", "issu_14_bil", "issu_30_bil"]]
fred = fred.groupby("date", as_index=False).mean(numeric_only=True)
repo = repo.groupby("date", as_index=False).mean(numeric_only=True)
d    = d.groupby("date", as_index=False).sum(numeric_only=True)   # issuance is additive

for col in ["VIX","HY_OAS","BAA10Y","SOFR","spr_tgcr","spr_effr","tgcr","effr"]:
    if col in fred.columns: fred[col] = coerce_num(fred[col])
    if col in repo.columns: repo[col] = coerce_num(repo[col])


# If 'controls' was not set from intermediate analysis_panel, build it from raw sources.
if "controls" not in globals():
    controls = fred.merge(repo, on="date", how="outer").merge(d, on="date", how="outer").sort_values("date")
    # keep only needed controls (drop extras like tgcr/effr if not needed)
    keep = ["date"] + sorted(set(CONFIG["direct_controls"]) & set(controls.columns))
    controls = controls[keep].copy()
    logger.info("Built controls from raw sources. columns=%s", keep)
    


In [None]:
controls

In [None]:
panel_long = arb_long.merge(controls, on="date", how="left")
for c in CONFIG["direct_controls"]:
    if c in panel_long.columns:
        panel_long[c] = pd.to_numeric(panel_long[c], errors="coerce")
panel_long.to_csv(run_dir / "data" / "arb_panel_long.csv", index=False)
panel_long = panel_long[(panel_long["date"] >= "2019-01-01") & (panel_long["date"] <= "2021-12-31")]
panel_long.head()


In [None]:
# Layer 1A: summary stats by series and regime
regimes = {
    "pre": (pd.Timestamp("2019-01-01"), pd.Timestamp("2020-03-31")),
    "relief": (pd.Timestamp("2020-04-01"), pd.Timestamp("2021-03-31")),
    "post": (pd.Timestamp("2021-04-01"), pd.Timestamp.max),
}
rows = []
for series, g in panel_long.groupby("series"):
    g = g.sort_values("date")
    for regime, (start, end) in regimes.items():
        sub = g[(g["date"] >= start) & (g["date"] <= end)][["date", "y"]].dropna()
        if sub.empty:
            continue
        lb_p = np.nan
        try:
            lb = acorr_ljungbox(sub["y"], lags=[min(10, max(1, len(sub) // 5))], return_df=True)
            lb_p = float(lb["lb_pvalue"].iloc[0])
        except Exception:
            pass
        rows.append({
            "series": str(series), "regime": regime,
            "sample_start": sub["date"].min(), "sample_end": sub["date"].max(), "N": int(sub.shape[0]),
            "mean": float(sub["y"].mean()), "std": float(sub["y"].std()),
            "p1": float(sub["y"].quantile(0.01)), "p5": float(sub["y"].quantile(0.05)),
            "p50": float(sub["y"].quantile(0.50)), "p95": float(sub["y"].quantile(0.95)), "p99": float(sub["y"].quantile(0.99)),
            "autocorr1": float(sub["y"].autocorr(1)), "ljungbox_pvalue": lb_p,
        })
summary_stats = pd.DataFrame(rows)
summary_stats.to_csv(run_dir / "tables" / "summary_stats.csv", index=False)
summary_stats.head()


In [None]:
# Layer 1B: jump regressions (TOTAL vs DIRECT) by series
jump_rows = []
for event in CONFIG["events"]:
    for window in CONFIG["windows"]:
        for series, g in panel_long.groupby("series"):
            for spec, controls_set in [("TOTAL", CONFIG["total_controls"]), ("DIRECT", CONFIG["direct_controls"])]:
                est, se, n = jump_estimator(g, y_col="y", event_date=event, window=window, controls=controls_set, hac_lags=CONFIG["hac_lags"])
                jump_rows.append({
                    "event": event, "window": window, "series": str(series), "spec": spec,
                    "estimate": est, "se": se,
                    "ci_low": est - 1.96 * se if pd.notna(est) and pd.notna(se) else np.nan,
                    "ci_high": est + 1.96 * se if pd.notna(est) and pd.notna(se) else np.nan,
                    "N": n,
                })
jump_results = pd.DataFrame(jump_rows)
jump_results.to_csv(run_dir / "tables" / "jump_results.csv", index=False)
jump_results.head()


In [None]:
# Layer 1C: binned event-study + plots
import matplotlib.pyplot as plt

def _bin_mid(term: str):
    m = re.search(r"\[\s*(-?\d+)\s*,\s*(-?\d+)\s*\]", str(term))
    if not m:
        return np.nan
    a,b=int(m.group(1)), int(m.group(2))
    return 0.5*(a+b)

bin_rows = []
for event in CONFIG["events"]:
    for series, g in panel_long.groupby("series"):
        for spec, controls_set in [("TOTAL", CONFIG["total_controls"]), ("DIRECT", CONFIG["direct_controls"])]:
            es = event_study_regression(g, y_col="y", event_date=event, bins=CONFIG["event_bins"], controls=controls_set, hac_lags=CONFIG["hac_lags"])
            if es.empty:
                continue
            es = es.copy()
            es["event"] = event
            es["series"] = str(series)
            es["spec"] = spec
            es["bin_mid"] = es["term"].apply(_bin_mid)
            # enforce numeric for plotting
            for c in ["estimate","ci_low","ci_high","bin_mid"]:
                if c in es.columns:
                    es[c] = pd.to_numeric(es[c], errors="coerce")
            bin_rows.append(es)

            plot_df = es.sort_values("bin_mid").dropna(subset=["bin_mid","estimate","ci_low","ci_high"])
            if plot_df.empty:
                continue
            fig, ax = plt.subplots(figsize=(8, 4))
            ax.plot(plot_df["bin_mid"].to_numpy(float), plot_df["estimate"].to_numpy(float), marker="o")
            ax.fill_between(plot_df["bin_mid"].to_numpy(float), plot_df["ci_low"].to_numpy(float), plot_df["ci_high"].to_numpy(float), alpha=0.2)
            ax.axhline(0, color="black", linewidth=1)
            ax.axvline(0, color="black", linewidth=1, linestyle="--")
            ax.set_xlabel("Event time (bin midpoint)")
            ax.set_title(f"{CONFIG['series_label']} | Event={event} series={series} spec={spec}")
            fig.tight_layout()
            fig.savefig(run_dir / "figures" / f"event_path_{slugify(CONFIG['series_label'])}_{slugify(series)}_{event}_{spec.lower()}.png", dpi=150)
            plt.close(fig)

eventstudy_bins = pd.concat(bin_rows, ignore_index=True) if bin_rows else pd.DataFrame()
eventstudy_bins.to_csv(run_dir / "tables" / "eventstudy_bins.csv", index=False)
eventstudy_bins.head()


In [None]:
# Layer 1D pooled regression with series FE + stargazer export
pooled_rows = []
models = []
for event in CONFIG["events"]:
    sub = add_event_time(panel_long, event)
    sub = sub[sub["event_time"].between(-60, 60)].copy()
    sub["post"] = (sub["event_time"] >= 0).astype(int)
    for spec, controls_set in [("TOTAL", CONFIG["total_controls"]), ("DIRECT", CONFIG["direct_controls"])]:
        use_controls = [c for c in controls_set if c in sub.columns]
        use_cols = ["y", "post", "series", *use_controls]
        reg = sub[use_cols].dropna().copy()
        if reg.empty:
            continue
        rhs = "post + C(series)"
        if use_controls:
            rhs += " + " + " + ".join(use_controls)

        # statsmodels robust fit (keeps robust bse attached)
        res = ols(f"y ~ {rhs}", data=reg).fit(cov_type="HAC", cov_kwds={"maxlags": CONFIG["hac_lags"]})
        models.append(res)

        if "post" in res.params.index:
            pooled_rows.append({"event": event, "spec": spec, "post": float(res.params["post"]), "se": float(res.bse["post"]), "N": int(res.nobs)})

pd.DataFrame(pooled_rows).to_csv(run_dir / "tables" / "pooled_jump_results.csv", index=False)

html_path = run_dir / "tables" / "regression_table.html"
try:
    import pandas as pd  # ensure available
    import stargazer.stargazer as _st
    _st.pd = pd  # monkey-patch: stargazer sometimes references pd without importing
    from stargazer.stargazer import Stargazer
    if models:
        sg = Stargazer(models)
        sg.title(f"Pooled jump regressions (HAC SE) | {CONFIG['series_label']}")
        html_path.write_text(sg.render_html(), encoding="utf-8")
    else:
        html_path.write_text("<html><body><p>No pooled models available.</p></body></html>", encoding="utf-8")
except Exception as exc:  # noqa: BLE001
    html_path.write_text(f"<html><body><p>Stargazer unavailable: {exc}</p></body></html>", encoding="utf-8")

pd.DataFrame(pooled_rows).head()


In [None]:
# Layer 2 mechanism (weekly), skip gracefully if required data missing
layer2_note = ""
try:
    pd_long = load_any_table(resolve_dataset_path("primary_dealer_stats_ofr_stfm_nypd_long", expected_dir=repo_root / "data" / "raw" / "event_inputs"))
    bank = load_any_table(resolve_dataset_path("bank_exposure_y9c_agg_daily", expected_dir=repo_root / "data" / "raw" / "event_inputs"))

    pd_long["date"] = pd.to_datetime(pd_long["date"], errors="coerce")
    bank["date"] = pd.to_datetime(bank["date"], errors="coerce")

    util_w = pd_long.pivot_table(index="date", columns="mnemonic", values="value", aggfunc="mean").resample("W-FRI").mean()
    util_w["utilization_index"] = util_w.sum(axis=1, min_count=1)
    util_w["utilization_lag1w"] = util_w["utilization_index"].shift(1)

    bank_w = bank.set_index("date").resample("W-FRI").mean()[["agg_exempt_share"]]

    # Weekly mean spread across series (keeps design consistent with template)
    y_w = panel_long.groupby([pd.Grouper(key="date", freq="W-FRI")])["y"].mean().to_frame("y")

    c_w = panel_long.set_index("date")[[c for c in CONFIG["direct_controls"] if c in panel_long.columns]].resample("W-FRI").mean()

    mech = y_w.join([bank_w, util_w[["utilization_lag1w"]], c_w], how="inner").dropna()
    mech["relief"] = ((mech.index >= "2020-04-01") & (mech.index <= "2021-03-31")).astype(int)
    mech["z_exempt"] = (mech["agg_exempt_share"] - mech["agg_exempt_share"].mean()) / mech["agg_exempt_share"].std()
    mech["z_util_l1"] = (mech["utilization_lag1w"] - mech["utilization_lag1w"].mean()) / mech["utilization_lag1w"].std()
    mech["relief_x_exempt"] = mech["relief"] * mech["z_exempt"]
    mech["relief_x_util"] = mech["relief"] * mech["z_util_l1"]

    rhs = "relief + relief_x_exempt + relief_x_util"
    use_controls = [c for c in CONFIG["total_controls"] + [c for c in CONFIG["direct_controls"] if c not in CONFIG["total_controls"]] if c in mech.columns]
    if use_controls:
        rhs += " + " + " + ".join(use_controls)

    res = ols(f"y ~ {rhs}", data=mech).fit(cov_type="HAC", cov_kwds={"maxlags": 4})
    out = pd.DataFrame({"term": res.params.index, "coef": res.params.values, "se": res.bse.values})
    out.to_csv(run_dir / "tables" / "layer2_mechanism_weekly.csv", index=False)
    out.head()

except Exception as exc:  # noqa: BLE001
    layer2_note = f"Layer 2 skipped: {exc}"
    (run_dir / "tables" / "layer2_mechanism_weekly.csv").write_text(layer2_note, encoding="utf-8")

layer2_note
