# Summary pipeline (multi-tenor arb outcomes)
This notebook runs the arb outcome event-study pipeline using real repo data only (no synthetic seeds).


In [1]:
from __future__ import annotations

import ast
import hashlib
import json
import logging
import shutil
from datetime import datetime
from pathlib import Path
import sys, os
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.diagnostic import acorr_ljungbox
sys.path.insert(2, "../src")
if 'src' in os.getcwd():
    os.chdir(os.path.pardir)
    print(os.getcwd())
else:
    print(os.getcwd())
from slr_bucket.econometrics.event_study import add_event_time, event_study_regression, jump_estimator
from slr_bucket.io import build_data_catalog, load_any_table, resolve_dataset_path, as_daily_date, coerce_num


c:\Users\Owner\Box\Winter26\slr_bucket\notebooks


In [2]:
CONFIG = {
    "outcomes_source": "tips_treasury_implied_rf_2010",
    "outcome_pattern": "arb_",
    "tenors_required": [2, 5, 10],
    "events": ["2020-04-01", "2021-03-19", "2021-03-31"],
    "windows": [20, 60],
    "event_bins": [(-60, -41), (-40, -21), (-20, -1), (0, 0), (1, 20), (21, 40), (41, 60)],
    "total_controls": ["VIX", "HY_OAS", "BAA10Y", "issu_7_bil", "issu_14_bil", "issu_30_bil"],
    "direct_controls": ["VIX", "HY_OAS", "BAA10Y", "issu_7_bil", "issu_14_bil", "issu_30_bil", "SOFR", "spr_tgcr", "spr_effr"],
    "hac_lags": 5,
}
repo_root = Path.cwd().parent
cfg_hash = hashlib.sha256(json.dumps(CONFIG, sort_keys=True).encode()).hexdigest()[:12]
run_stamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
run_dir = repo_root / "outputs" / "summary_pipeline" / f"{run_stamp}_{cfg_hash}"
for sub in ["figures", "tables", "data", "logs"]:
    (run_dir / sub).mkdir(parents=True, exist_ok=True)
latest_dir = repo_root / "outputs" / "summary_pipeline" / "latest"
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s - %(message)s", handlers=[logging.FileHandler(run_dir / "logs" / "pipeline.log"), logging.StreamHandler()], force=True)
logger = logging.getLogger("summary_pipeline_multi")
run_dir


  run_stamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")


WindowsPath('c:/Users/Owner/Box/Winter26/slr_bucket/outputs/summary_pipeline/20260227_055256_03366b29ad5f')

## Data map for the new `/data` structure
Used in this run:
- Outcomes: `data/series/tips_treasury_implied_rf_2010.(parquet|csv)` (`arb_*` only).
- Controls (preferred): `data/intermediate/analysis_panel.csv` if all required columns are present.
- Controls (fallback):
  - `data/raw/event_inputs/controls_vix_creditspreads_fred.(parquet|csv)`
  - `data/raw/event_inputs/repo_rates_combined.(parquet|csv)` or `repo_rates_fred`
  - `data/raw/event_inputs/treasury_issuance_by_tenor_fiscaldata.(parquet|csv)`
- Mechanism proxies (optional):
  - `primary_dealer_stats_ofr_stfm_nypd_long`
  - `bank_exposure_y9c_agg_daily`


In [3]:
catalog = build_data_catalog(repo_root / "data")
catalog.to_csv(run_dir / "data" / "data_catalog.csv", index=False)
catalog.to_parquet(run_dir / "data" / "data_catalog.parquet", index=False)
catalog.to_markdown(run_dir / "data" / "data_catalog.md", index=False)
catalog.head(20)


Unnamed: 0,path,layer,rows,columns,frequency,date_min,date_max,key_columns,join_hints
0,c:\Users\Owner\Box\Winter26\slr_bucket\data\in...,intermediate,5476,"date,spread_2y_bps,spread_5y_bps,spread_10y_bp...",daily,2010-01-04,2024-12-31,date,daily:date | keys:date | layer:intermediate
1,c:\Users\Owner\Box\Winter26\slr_bucket\data\in...,intermediate,420,"date,bid_ask_spread,pubout,n_issues",monthly,1980-01-31,2014-12-31,date,keys:date | layer:intermediate
2,c:\Users\Owner\Box\Winter26\slr_bucket\data\in...,intermediate,1209,"date,fed_assets",weekly,2002-12-18,2026-02-11,date,weekly:date | keys:date | layer:intermediate
3,c:\Users\Owner\Box\Winter26\slr_bucket\data\in...,intermediate,1209,"date,fed_treasury_holdings",weekly,2002-12-18,2026-02-11,date,weekly:date | keys:date | layer:intermediate
4,c:\Users\Owner\Box\Winter26\slr_bucket\data\in...,intermediate,751,"date,sofr,sofr_volume",daily,2019-01-02,2021-12-31,date,daily:date | keys:date | layer:intermediate
5,c:\Users\Owner\Box\Winter26\slr_bucket\data\in...,intermediate,3752,"date,spread_2y_bps,spread_5y_bps,spread_10y_bp...",daily,2010-01-04,2024-12-31,date,daily:date | keys:date | layer:intermediate
6,c:\Users\Owner\Box\Winter26\slr_bucket\data\ra...,raw,3955,"Date,AUD,CAD,CHF,EUR,GBP,JPY,NZD,SEK,USD",unknown,NaT,NaT,,layer:raw
7,c:\Users\Owner\Box\Winter26\slr_bucket\data\ra...,raw,3913,"('SPX Index', 'PX_LAST'),('SPX Index', 'IDX_ES...",unknown,NaT,NaT,,layer:raw
8,c:\Users\Owner\Box\Winter26\slr_bucket\data\ra...,raw,14,"report_date,total_assets,total_reserves,total_...",quarterly,NaT,NaT,report_date,quarterly:report_date | keys:report_date | lay...
9,c:\Users\Owner\Box\Winter26\slr_bucket\data\ra...,raw,14,"report_date,total_assets,total_reserves,total_...",quarterly,NaT,NaT,report_date,quarterly:report_date | keys:report_date | lay...


In [4]:
outcome_path = resolve_dataset_path(CONFIG["outcomes_source"], expected_dir=repo_root / "data" / "series")
outcomes = load_any_table(outcome_path)
outcomes["date"] = pd.to_datetime(outcomes["date"], errors="coerce")
arb_cols = sorted([c for c in outcomes.columns if c.startswith("arb_")], key=lambda x: int(x.split("_")[1]))
if not arb_cols:
    raise ValueError("No arb_* outcome columns found.")
arb_long = outcomes[["date", *arb_cols]].melt(id_vars=["date"], var_name="outcome", value_name="y")
arb_long["tenor"] = arb_long["outcome"].str.extract(r"arb_(\d+)").astype(float).astype("Int64")
arb_long["y"] = pd.to_numeric(arb_long["y"], errors="coerce")
arb_long = arb_long.dropna(subset=["date", "tenor", "y"]).sort_values(["tenor", "date"]).reset_index(drop=True)
value_q = arb_long["y"].abs().quantile([0.5, 0.9, 0.99]).to_dict()
unit_note = "Likely basis points" if value_q.get(0.5, 0) > 0.2 else "Likely decimal units"
{"outcome_path": str(outcome_path), "arb_cols": arb_cols, "value_q": value_q, "unit_note": unit_note}


{'outcome_path': 'c:\\Users\\Owner\\Box\\Winter26\\slr_bucket\\data\\series\\tips_treasury_implied_rf_2010.parquet',
 'arb_cols': ['arb_2', 'arb_5', 'arb_10', 'arb_20'],
 'value_q': {0.5: 22.171970346176465,
  0.9: 33.97164601022243,
  0.99: 40.397917461319494},
 'unit_note': 'Likely basis points'}

In [27]:
# Controls: prefer intermediate analysis_panel if valid, else fallback build from raw.
# def build_controls_panel():

needed = set(CONFIG["direct_controls"])
try:
    p = resolve_dataset_path("analysis_panel", expected_dir=repo_root / "data" / "intermediate")
    panel = load_any_table(p)
    panel["date"] = pd.to_datetime(panel["date"], errors="coerce")
    if needed.issubset(set(panel.columns)):
        logger.info("Using controls from intermediate analysis_panel: %s", p)
        controls =  panel[["date", *sorted(needed)]].copy() #, str(p)
except Exception as exc:
    logger.warning("analysis_panel unavailable/invalid (%s), using raw fallback", exc)

fred = load_any_table(resolve_dataset_path("controls_vix_creditspreads_fred", expected_dir=repo_root / "data" / "raw" / "event_inputs"))
fred["date"] = pd.to_datetime(fred["date"], errors="coerce")
fred["date"] = as_daily_date(fred["date"])
try:
    repo = load_any_table(resolve_dataset_path("repo_rates_combined", expected_dir=repo_root / "data" / "raw" / "event_inputs"))
except FileNotFoundError:
    repo = load_any_table(resolve_dataset_path("repo_rates_fred", expected_dir=repo_root / "data" / "raw" / "event_inputs"))
repo["date"] = pd.to_datetime(repo["date"], errors="coerce")
repo["date"] = as_daily_date(repo["date"])
repo = repo.rename(columns={"TGCR":"tgcr", "EFFR":"effr"})
if "spr_tgcr" not in repo.columns and {"SOFR","tgcr"}.issubset(repo.columns):
    repo["spr_tgcr"] = pd.to_numeric(repo["tgcr"], errors="coerce") - pd.to_numeric(repo["SOFR"], errors="coerce")
if "spr_effr" not in repo.columns and {"SOFR","effr"}.issubset(repo.columns):
    repo["spr_effr"] = pd.to_numeric(repo["effr"], errors="coerce") - pd.to_numeric(repo["SOFR"], errors="coerce")

issu = load_any_table(resolve_dataset_path("treasury_issuance_by_tenor_fiscaldata", expected_dir=repo_root / "data" / "raw" / "event_inputs"))
issu["date"] = pd.to_datetime(issu.get("issue_date"), errors="coerce")
issu["date"] = as_daily_date(issu["date"])
issu["tenor_bucket"] = pd.to_numeric(issu["tenor_bucket"], errors="coerce")
issu["issuance_amount"] = pd.to_numeric(issu["issuance_amount"], errors="coerce") / 1e9
d = issu.pivot_table(index="date", columns="tenor_bucket", values="issuance_amount", aggfunc="sum").reset_index()

# Robustly rename tenor-bucket columns to issu_*_bil (handles int/float/str column labels)
rename_map = {}
for col in d.columns:
    if col == "date":
        continue
    try:
        v = float(col)
    except Exception:
        continue
    if abs(v - 7.0) < 1e-9:
        rename_map[col] = "issu_7_bil"
    elif abs(v - 10.0) < 1e-9:
        rename_map[col] = "issu_10_bil"
    elif abs(v - 14.0) < 1e-9:
        rename_map[col] = "issu_14_bil"
    elif abs(v - 20.0) < 1e-9:
        rename_map[col] = "issu_20_bil"
    elif abs(v - 30.0) < 1e-9:
        rename_map[col] = "issu_30_bil"
d = d.rename(columns=rename_map)

# Ensure required issuance controls exist (zeros if not present in file)
for c in ["issu_7_bil", "issu_14_bil", "issu_30_bil", "issu_10_bil", "issu_20_bil"]:
    if c not in d.columns:
        d[c] = 0.0

# If 14y bucket absent, approximate as 10y+20y (as in prior logic)
if d["issu_14_bil"].fillna(0.0).abs().sum() == 0.0:
    d["issu_14_bil"] = d.get("issu_10_bil", 0.0) + d.get("issu_20_bil", 0.0)

for c in ["issu_7_bil", "issu_14_bil", "issu_30_bil"]:
    d[c] = pd.to_numeric(d[c], errors="coerce").fillna(0.0)

# Keep only the issuance controls used in the design
d = d[["date", "issu_7_bil", "issu_14_bil", "issu_30_bil"]]
fred = fred.groupby("date", as_index=False).mean(numeric_only=True)
repo = repo.groupby("date", as_index=False).mean(numeric_only=True)
d    = d.groupby("date", as_index=False).sum(numeric_only=True)   # issuance is additive

for col in ["VIX","HY_OAS","BAA10Y","SOFR","spr_tgcr","spr_effr","tgcr","effr"]:
    if col in fred.columns: fred[col] = coerce_num(fred[col])
    if col in repo.columns: repo[col] = coerce_num(repo[col])


# If 'controls' was not set from intermediate analysis_panel, build it from raw sources.
if "controls" not in globals():
    controls = fred.merge(repo, on="date", how="outer").merge(d, on="date", how="outer").sort_values("date")
    # keep only needed controls (drop extras like tgcr/effr if not needed)
    keep = ["date"] + sorted(set(CONFIG["direct_controls"]) & set(controls.columns))
    controls = controls[keep].copy()
    logger.info("Built controls from raw sources. columns=%s", keep)
    


In [41]:
controls

Unnamed: 0,date,outcome,y,tenor,BAA10Y,HY_OAS,SOFR,VIX,issu_14_bil,issu_30_bil,issu_7_bil,spr_tgcr
2216,2019-01-02,arb_2,19.777406,2,2.45,5.35,3.15,23.22,,,,-0.05
2217,2019-01-03,arb_2,19.971055,2,2.48,5.44,2.70,25.45,,,,0.00
2218,2019-01-04,arb_2,15.123262,2,2.45,5.05,2.45,21.38,,,,-0.02
2219,2019-01-07,arb_2,16.924870,2,2.42,4.83,2.41,21.40,,,,-0.03
2220,2019-01-08,arb_2,15.079427,2,2.39,4.65,2.42,20.47,,,,-0.02
...,...,...,...,...,...,...,...,...,...,...,...,...
14219,2021-12-27,arb_20,14.211492,20,1.86,3.02,0.05,17.68,,,,0.00
14220,2021-12-28,arb_20,12.289827,20,1.86,3.01,0.05,17.54,,,,0.00
14221,2021-12-29,arb_20,16.372281,20,1.85,3.03,0.05,16.95,,,,0.00
14222,2021-12-30,arb_20,17.382585,20,1.85,3.09,0.05,17.33,,,,0.00


In [29]:
panel_long = arb_long.merge(controls, on="date", how="left")
for c in CONFIG["direct_controls"]:
    if c in panel_long.columns:
        panel_long[c] = pd.to_numeric(panel_long[c], errors="coerce")
panel_long.to_parquet(run_dir / "data" / "arb_panel_long.parquet", index=False)
panel_long = panel_long[(panel_long["date"] >= "2019-01-01") & (panel_long["date"] <= "2021-12-31")]
panel_long.head()


Unnamed: 0,date,outcome_x,y_x,tenor_x,outcome_y,y_y,tenor_y,BAA10Y,HY_OAS,SOFR,VIX,issu_14_bil,issu_30_bil,issu_7_bil,spr_tgcr
2216,2019-01-02,arb_2,19.777406,2,arb_2,19.777406,2,2.45,5.35,3.15,23.22,,,,-0.05
2217,2019-01-02,arb_2,19.777406,2,arb_5,18.687345,5,2.45,5.35,3.15,23.22,,,,-0.05
2218,2019-01-02,arb_2,19.777406,2,arb_10,26.677623,10,2.45,5.35,3.15,23.22,,,,-0.05
2219,2019-01-02,arb_2,19.777406,2,arb_20,30.344502,20,2.45,5.35,3.15,23.22,,,,-0.05
2220,2019-01-03,arb_2,19.971055,2,arb_2,19.971055,2,2.48,5.44,2.7,25.45,,,,0.0


In [33]:
regimes = {
    "pre": (pd.Timestamp("2019-01-01"), pd.Timestamp("2020-03-31")),
    "relief": (pd.Timestamp("2020-04-01"), pd.Timestamp("2021-03-31")),
    "post": (pd.Timestamp("2021-04-01"), pd.Timestamp.max),
}
rows = []
for tenor, g in panel_long.groupby("tenor_x"):
    g = g.sort_values("date")
    for regime, (start, end) in regimes.items():
        sub = g[(g["date"] >= start) & (g["date"] <= end)][["date", "y_x"]].dropna()
        if sub.empty:
            continue
        lb_p = np.nan
        try:
            lb = acorr_ljungbox(sub["y_x"], lags=[min(10, max(1, len(sub) // 5))], return_df=True)
            lb_p = float(lb["lb_pvalue"].iloc[0])
        except Exception:  # noqa: BLE001
            pass
        rows.append({
            "tenor": int(tenor), "regime": regime,
            "sample_start": sub["date"].min(), "sample_end": sub["date"].max(), "N": int(sub.shape[0]),
            "mean": sub["y_x"].mean(), "std": sub["y_x"].std(),
            "p1": sub["y_x"].quantile(0.01), "p5": sub["y_x"].quantile(0.05), "p50": sub["y_x"].quantile(0.50), "p95": sub["y_x"].quantile(0.95), "p99": sub["y_x"].quantile(0.99),
            "autocorr1": sub["y_x"].autocorr(1), "ljungbox_pvalue": lb_p,
        })
summary_stats = pd.DataFrame(rows)
summary_stats.to_csv(run_dir / "tables" / "summary_stats.csv", index=False)
summary_stats.head()


Unnamed: 0,tenor,regime,sample_start,sample_end,N,mean,std,p1,p5,p50,p95,p99,autocorr1,ljungbox_pvalue
0,2,pre,2019-01-02,2020-03-31,1248,16.613717,16.156221,-12.840569,-9.772561,19.461887,36.86094,39.422457,0.994182,0.0
1,2,relief,2020-04-01,2021-03-31,1000,16.823866,15.999072,-23.646631,-17.151199,22.276635,33.751581,38.568851,0.994448,0.0
2,2,post,2021-04-01,2021-12-31,760,11.698582,12.706903,-12.503329,-11.103891,13.402866,27.843259,31.597212,0.992596,0.0
3,5,pre,2019-01-02,2020-03-31,1248,16.313539,6.093164,5.134608,6.436242,16.787261,24.287736,24.937615,0.982894,0.0
4,5,relief,2020-04-01,2021-03-31,1000,13.653405,7.635521,-5.231925,-2.220751,16.151514,22.393695,23.719841,0.996421,0.0


In [35]:
jump_rows = []
for event in CONFIG["events"]:
    for window in CONFIG["windows"]:
        for tenor, g in panel_long.groupby("tenor_x"):
            for spec, controls_set in [("TOTAL", CONFIG["total_controls"]), ("DIRECT", CONFIG["direct_controls"])]:
                est, se, n = jump_estimator(g, y_col="y_x", event_date=event, window=window, controls=controls_set, hac_lags=CONFIG["hac_lags"])
                jump_rows.append({
                    "event": event, "window": window, "tenor": int(tenor), "spec": spec,
                    "estimate": est, "se": se,
                    "ci_low": est - 1.96 * se if pd.notna(est) and pd.notna(se) else np.nan,
                    "ci_high": est + 1.96 * se if pd.notna(est) and pd.notna(se) else np.nan,
                    "N": n,
                })
jump_results = pd.DataFrame(jump_rows)
jump_results.to_csv(run_dir / "tables" / "jump_results.csv", index=False)
jump_results.head()


Unnamed: 0,event,window,tenor,spec,estimate,se,ci_low,ci_high,N
0,2020-04-01,20,2,TOTAL,-14.6982,4.019978e-13,-14.6982,-14.6982,20
1,2020-04-01,20,2,DIRECT,-14.561544,6.61889e-13,-14.561544,-14.561544,20
2,2020-04-01,20,5,TOTAL,4.039213,1.690949e-13,4.039213,4.039213,20
3,2020-04-01,20,5,DIRECT,-3.779389,6.720627e-14,-3.779389,-3.779389,20
4,2020-04-01,20,10,TOTAL,51.446176,4.051167e-13,51.446176,51.446176,20


In [37]:
import matplotlib.pyplot as plt

bin_rows = []
for event in CONFIG["events"]:
    for tenor, g in panel_long.groupby("tenor_x"):
        for spec, controls_set in [("TOTAL", CONFIG["total_controls"]), ("DIRECT", CONFIG["direct_controls"])]:
            es = event_study_regression(g, y_col="y_x", event_date=event, bins=CONFIG["event_bins"], controls=controls_set, hac_lags=CONFIG["hac_lags"])
            if es.empty:
                continue
            es["event"] = event
            es["tenor"] = int(tenor)
            es["spec"] = spec
            bin_rows.append(es)

            plot_df = es.sort_values("term")
            fig, ax = plt.subplots(figsize=(8, 4))
            ax.plot(plot_df["term"], plot_df["estimate"], marker="o")
            ax.fill_between(plot_df["term"], plot_df["ci_low"], plot_df["ci_high"], alpha=0.2)
            ax.axhline(0, color="black", linewidth=1)
            ax.tick_params(axis="x", rotation=45)
            ax.set_title(f"Event={event} tenor={int(tenor)} spec={spec}")
            fig.tight_layout()
            fig.savefig(run_dir / "figures" / f"event_path_arb_{int(tenor)}y_{event}_{spec.lower()}.png", dpi=150)
            plt.close(fig)

eventstudy_bins = pd.concat(bin_rows, ignore_index=True) if bin_rows else pd.DataFrame()
eventstudy_bins.to_csv(run_dir / "tables" / "eventstudy_bins.csv", index=False)
eventstudy_bins.head()


Unnamed: 0,term,estimate,se,ci_low,ci_high,n,event,tenor,spec
0,"bin_[-40,-21]",-8.964944,14.286887,-36.967243,19.037355,56,2020-04-01,2,TOTAL
1,"bin_[-60,-41]",-16.657376,20.953421,-57.726081,24.411329,56,2020-04-01,2,TOTAL
2,"bin_[1,20]",-24.859389,5.187344,-35.026585,-14.692194,56,2020-04-01,2,TOTAL
3,"bin_[21,40]",-13.676632,6.260949,-25.948091,-1.405172,56,2020-04-01,2,TOTAL
4,"bin_[41,60]",-2.714063,9.79805,-21.91824,16.490115,56,2020-04-01,2,TOTAL


In [51]:
# --- Pooled jump regressions + REQUIRED Stargazer patch ---

import pandas as pd
import numpy as np
from statsmodels.formula.api import ols

pooled_rows = []
models = []

for event in CONFIG["events"]:
    sub = add_event_time(panel_long, event)
    sub = sub[sub["event_time"].between(-60, 60)].copy()
    sub["post"] = (sub["event_time"] >= 0).astype(int)

    for spec, controls_set in [
        ("TOTAL", CONFIG["total_controls"]),
        ("DIRECT", CONFIG["direct_controls"]),
    ]:
        use_controls = [c for c in controls_set if c in sub.columns]
        use_cols = ["y_x", "post", "tenor_x", *use_controls]

        reg = sub[use_cols].dropna().copy()
        if reg.empty:
            continue

        # Patsy-safe dtypes
        for c in reg.columns:
            if str(reg[c].dtype) in ("Int64", "Int32", "Int16", "boolean"):
                reg[c] = reg[c].astype("float64")

        reg["tenor_x"] = reg["tenor_x"].astype("category")

        rhs = "post + C(tenor_x)"
        if use_controls:
            rhs += " + " + " + ".join(use_controls)

        res = ols(f"y_x ~ {rhs}", data=reg).fit()
        robust = res.get_robustcov_results(cov_type="HAC", maxlags=CONFIG["hac_lags"])

        models.append(robust)

        if "post" in robust.model.exog_names:
            i = robust.model.exog_names.index("post")
            pooled_rows.append({
                "event": event,
                "spec": spec,
                "post": float(robust.params[i]),
                "se": float(robust.bse[i]),
                "N": int(robust.nobs),
            })

pooled_df = pd.DataFrame(pooled_rows)
pooled_df.to_csv(run_dir / "tables" / "pooled_jump_results.csv", index=False)

# -------------------------------
# FIX STARGAZER BUGS: inject pd into all stargazer modules that use it
# -------------------------------
import pandas as pd
import numpy as np

import stargazer.stargazer as stz
import stargazer.translators.statsmodels as stz_sm

# Some stargazer versions forget to import these:
stz.pd = pd
stz.np = np
stz_sm.pd = pd
stz_sm.np = np

from stargazer.stargazer import Stargazer

html_path = run_dir / "tables" / "regression_table.html"
sg = Stargazer(models)
sg.title("Pooled Jump Regressions (HAC SE)")
sg.show_degrees_of_freedom(False)
html_path.write_text(sg.render_html(), encoding="utf-8")



6053

In [46]:
layer2_note = ""
try:
    pd_long = load_any_table(resolve_dataset_path("primary_dealer_stats_ofr_stfm_nypd_long", expected_dir=repo_root / "data" / "raw" / "event_inputs"))
    bank = load_any_table(resolve_dataset_path("bank_exposure_y9c_agg_daily", expected_dir=repo_root / "data" / "raw" / "event_inputs"))

    pd_long["date"] = pd.to_datetime(pd_long["date"], errors="coerce")
    bank["date"] = pd.to_datetime(bank["date"], errors="coerce")

    util_w = pd_long.pivot_table(index="date", columns="mnemonic", values="value", aggfunc="mean").resample("W-FRI").mean()
    util_w["utilization_index"] = util_w.sum(axis=1, min_count=1)
    util_w["utilization_lag1w"] = util_w["utilization_index"].shift(1)

    bank_w = bank.set_index("date").resample("W-FRI").mean()[["agg_exempt_share"]]
    y_w = panel_long.groupby([pd.Grouper(key="date", freq="W-FRI")])["y_x"].mean().to_frame("y_x")
    c_w = panel_long.set_index("date")[[c for c in CONFIG["direct_controls"] if c in panel_long.columns]].resample("W-FRI").mean()

    mech = y_w.join([bank_w, util_w[["utilization_lag1w"]], c_w], how="inner").dropna()
    mech["relief"] = ((mech.index >= "2020-04-01") & (mech.index <= "2021-03-31")).astype(int)
    mech["z_exempt"] = (mech["agg_exempt_share"] - mech["agg_exempt_share"].mean()) / mech["agg_exempt_share"].std()
    mech["z_util_l1"] = (mech["utilization_lag1w"] - mech["utilization_lag1w"].mean()) / mech["utilization_lag1w"].std()
    mech["relief_x_exempt"] = mech["relief"] * mech["z_exempt"]
    mech["relief_x_util"] = mech["relief"] * mech["z_util_l1"]

    xcols = ["relief", "relief_x_exempt", "relief_x_util", *[c for c in CONFIG["direct_controls"] if c in mech.columns]]
    reg = mech[["y_x", *xcols]].dropna()
    X = sm.add_constant(reg[xcols], has_constant="add")
    res = sm.OLS(reg["y_x"], X).fit(cov_type="HAC", cov_kwds={"maxlags": 2})
    pd.DataFrame({"term": res.params.index, "coef": res.params.values, "se": res.bse.values}).to_csv(run_dir / "tables" / "layer2_mechanism_weekly.csv", index=False)
    layer2_note = "Layer 2 executed."
except Exception as exc:  # noqa: BLE001
    layer2_note = f"Layer 2 skipped gracefully: {exc}"

layer2_note


'Layer 2 executed.'