# v8_summary_pipeline

Self-contained notebook to run the **two-layer econometric design**:

- **Layer 1 (Identification)**: daily event-window event studies (TOTAL vs DIRECT) with Newey–West HAC, plus jump estimates.
- **Layer 2 (Mechanism)**: weekly regressions of spread changes on `Relief`, `Relief×z(bank_exposure)`, `Relief×z(dealer_util_lag1w)` with HAC.

This notebook does **not** import any repo-specific modules.

## Expected inputs (edit in CELL 1)
You can point the notebook at **any** daily spread dataset. Minimum requirement:

- a `date` column, and
- one of:
  - **wide**: multiple spread columns (e.g., `arb_2`, `arb_5`, ...)
  - **long**: `date`, `series_id`, `value`

Optional daily controls can be:
- in the same file, or
- provided as separate files (repo rates, VIX/credit, issuance, etc.)

Optional mechanism inputs:
- `bank_exposure_y9c_agg_daily.csv` (or any daily bank exposure proxy)
- `primary_dealer_stats_ofr_stfm_nypd_long.csv` (weekly; used to build dealer utilization index)

## Outputs (written when you run the notebook)
All outputs go to:

- `../data/v8_summary/`
  - `layer1_eventstudy_coeffs.csv`
  - `layer1_jump_estimates.csv`
  - `layer2_weekly_panel.csv`
  - `layer2_mechanism_results.csv`
- `../data/v8_summary/figures/`
  - time-series plots
  - event-study coefficient plots
  - mechanism coefficient plot


In [1]:
# CELL 1 — Configuration (edit paths here)

from pathlib import Path
import pandas as pd
import numpy as np

# -----------------------
# Core user inputs
# -----------------------
# Daily spreads / wedge file (CSV or Parquet). Must include 'date' and either:
#  - long:  date, series_id, value
#  - wide:  date + multiple spread columns (you will select columns by patterns below)
DAILY_SPREADS_PATH = None  # e.g., Path("_data/tips_treasury_implied_rf_2010.parquet") or Path("event_study_start_v4_wide.csv")

# Optional: daily controls (if not inside DAILY_SPREADS_PATH)
REPO_RATES_PATH   = Path("../data/event_inputs/repo_rates_combined.csv")
RISK_CONTROLS_PATH = Path("../data/event_inputs/controls_vix_creditspreads_fred.csv")
ISSUANCE_PATH     = Path("../data/event_inputs/treasury_issuance_by_tenor_fiscaldata.csv")

# Optional mechanism inputs
BANK_EXPOSURE_DAILY_PATH = Path("../data/event_inputs/bank_exposure_y9c_agg_daily.csv")  # must have date + some exposure share column
PRIMARY_DEALER_LONG_PATH = Path("../data/event_inputs/primary_dealer_stats_ofr_stfm_nypd_long.csv")  # must have date,mnemonic,value

# If your files are somewhere else, set these paths explicitly.
# Example:
# DAILY_SPREADS_PATH = Path("/path/to/event_study_start_v4_wide.csv")

# -----------------------
# Column inference settings (wide daily file)
# -----------------------
# Patterns used to pick "spread" columns if your daily file is wide.
# Edit these if your names differ.
SPREAD_COL_PATTERNS = [
    r"\barb[_\-]?\d+\b",                 # arb_2, arb_5, ...
    r"\bwedge(_bps)?[_\-]?\d+\b",        # wedge_2, wedge_bps_2, ...
    r"\btips_treas[_\-]?\d+\b",          # tips_treas_2_rf, ...
]

# Controls (DIRECT spec) columns to use if available in merged daily panel
DIRECT_CONTROL_CANDS = [
    "SOFR",
    "spr_tgcr",        # TGCR–SOFR spread (preferred)
    "spr_effr",        # SOFR–EFFR or EFFR–SOFR depending on your construction
    "TGCR_minus_SOFR", # derived if TGCR and SOFR exist
    "BGCR_minus_SOFR", # derived if BGCR and SOFR exist
]

# -----------------------
# Event dates and windows
# -----------------------
EVENTS = [
    ("start_effective", pd.Timestamp("2020-04-01")),
    ("exit_announce",   pd.Timestamp("2021-03-19")),
    ("expiry",          pd.Timestamp("2021-03-31")),
]
WINDOWS = [20, 60]          # event window half-width W in days
OMIT_K  = -1                # omitted baseline day
NW_LAGS_DAILY = 10          # HAC lags for daily
NW_LAGS_WEEKLY = 4          # HAC lags for weekly

# Relief indicator window for mechanism layer
RELIEF_START = pd.Timestamp("2020-04-01")
RELIEF_END   = pd.Timestamp("2021-03-31")

# -----------------------
# Output directory
# -----------------------
OUT_DIR = Path("../outputs/v8_summary")
FIG_DIR = OUT_DIR / "figures"
OUT_DIR.mkdir(parents=True, exist_ok=True)
FIG_DIR.mkdir(parents=True, exist_ok=True)

print("OUT_DIR:", OUT_DIR.resolve())
print("FIG_DIR:", FIG_DIR.resolve())


OUT_DIR: C:\Users\Owner\Box\Winter26\slr_bucket\outputs\v8_summary
FIG_DIR: C:\Users\Owner\Box\Winter26\slr_bucket\outputs\v8_summary\figures


In [6]:
# CELL 2 — Imports + helper functions (self-contained)

import re
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import statsmodels.api as sm

warnings.filterwarnings("ignore", category=FutureWarning)

def _ensure_date(df, col="date"):
    if col not in df.columns:
        # attempt common variants
        for alt in ["Date", "DATE", "dt", "tradeDate", "tradedate"]:
            if alt in df.columns:
                df = df.rename(columns={alt: col})
                break
    if col not in df.columns:
        raise ValueError(f"Missing '{col}' column. cols={df.columns.tolist()}")
    out = df.copy()
    out[col] = pd.to_datetime(out[col])
    return out

def _read_any(path: Path):
    if path is None:
        return None
    path = Path(path)
    if not path.exists():
        return None
    if path.suffix.lower() == ".parquet":
        return pd.read_parquet(path)
    return pd.read_csv(path)

def _pick_cols_by_patterns(df, patterns):
    cols = df.columns.tolist()
    keep = set()
    for pat in patterns:
        rx = re.compile(pat, re.IGNORECASE)
        for c in cols:
            if rx.search(c):
                keep.add(c)
    return sorted(keep)

def _wide_to_long_spreads(df_wide, date_col="date", value_name="value"):
    # picks spread columns by patterns and melts
    spread_cols = _pick_cols_by_patterns(df_wide, SPREAD_COL_PATTERNS)
    if not spread_cols:
        raise ValueError(
            "Could not infer spread columns from wide daily file. "
            f"Try editing SPREAD_COL_PATTERNS. cols={df_wide.columns.tolist()}"
        )
    out = df_wide[[date_col] + spread_cols].melt(id_vars=[date_col], var_name="series_id", value_name=value_name)
    return out, spread_cols

def _standardize_z(x):
    x = pd.to_numeric(x, errors="coerce")
    return (x - x.mean()) / x.std(ddof=0)

def _event_dummies(dates: pd.Series, t0: pd.Timestamp, W: int, omit_k: int):
    k = (dates - t0).dt.days.astype(int)
    d = {}
    for kk in range(-W, W+1):
        if kk == omit_k:
            continue
        d[f"k_{kk:+d}"] = (k == kk).astype(int)
    D = pd.DataFrame(d)
    return D, k

def _nw_ols(y, X, lags: int):
    """
    OLS with Newey–West HAC, robust to object dtypes.
    Forces numeric, drops all-NaN and zero-variance columns, aligns y/X.
    """
    # y -> numeric
    y = pd.to_numeric(pd.Series(y), errors="coerce")

    # X -> numeric (coerce objects/bools)
    Xc = sm.add_constant(X, has_constant="add").copy()

    # Convert booleans explicitly (sometimes they stay object after concat)
    for c in Xc.columns:
        if Xc[c].dtype == bool:
            Xc[c] = Xc[c].astype(float)

    # Coerce everything else to numeric
    Xc = Xc.apply(lambda s: pd.to_numeric(s, errors="coerce"))

    # Drop columns that are entirely missing
    Xc = Xc.dropna(axis=1, how="all")

    # Drop columns with zero variance (can appear if a dummy is all zeros in window)
    nunique = Xc.nunique(dropna=True)
    Xc = Xc.loc[:, nunique > 1]

    # Align and drop rows with any missing in y or X
    df = pd.concat([y.rename("y"), Xc], axis=1)
    df = df.dropna(axis=0, how="any")

    y2 = df["y"].astype(float).values
    X2 = df.drop(columns=["y"]).astype(float).values

    res = sm.OLS(y2, X2).fit(cov_type="HAC", cov_kwds={"maxlags": lags})
    # Attach param names back for convenience
    res.model.data.xnames = list(df.drop(columns=["y"]).columns)
    return res

def _run_eventstudy_one(df, ycol, t0, W, controls=None, omit_k=-1, lags=10):
    D, k = _event_dummies(df["date"], t0, W, omit_k)
    X = D.copy()
    if controls:
        for c in controls:
            if c in df.columns:
                X[c] = df[c]
    res = _nw_ols(df[ycol], X, lags=lags)
    out = []
    for name in D.columns:
        kk = int(name.replace("k_", "").replace("+",""))
        beta = res.params.get(name, np.nan)
        se   = res.bse.get(name, np.nan)
        out.append({"k": kk, "beta": beta, "se": se})
    out = pd.DataFrame(out).sort_values("k")
    out["lo"] = out["beta"] - 1.96*out["se"]
    out["hi"] = out["beta"] + 1.96*out["se"]
    return out

def _run_eventstudy_pooled(long_df, t0, W, controls=None, omit_k=-1, lags=10):
    # long_df: date, series_id, value plus controls
    D, k = _event_dummies(long_df["date"], t0, W, omit_k)
    X = D.copy()
    # series FE via one-hot (drop first)
    fe = pd.get_dummies(long_df["series_id"], prefix="fe", drop_first=True)
    X = pd.concat([X.reset_index(drop=True), fe.reset_index(drop=True)], axis=1)
    if controls:
        for c in controls:
            if c in long_df.columns:
                X[c] = long_df[c].values

    # ensure no stray non-numeric columns
    X = X.copy()
    for c in X.columns:
        if X[c].dtype == bool:
            X[c] = X[c].astype(float)

    res = _nw_ols(long_df["value"], X, lags=lags)
    out = []
    for col in D.columns:
        kk = int(col.replace("k_", "").replace("+",""))
        out.append({"k": kk, "beta": res.params.get(col, np.nan), "se": res.bse.get(col, np.nan)})
    out = pd.DataFrame(out).sort_values("k")
    out["lo"] = out["beta"] - 1.96*out["se"]
    out["hi"] = out["beta"] + 1.96*out["se"]
    return out

def _jump_estimate(df, ycol, t0, W):
    # mean post (k in [0,W]) minus mean pre (k in [-W,-1])
    k = (df["date"] - t0).dt.days.astype(int)
    pre = df.loc[(k >= -W) & (k <= -1), ycol]
    post = df.loc[(k >= 0) & (k <= W), ycol]
    return float(post.mean() - pre.mean())



In [7]:
# CELL 3 — Load inputs and construct the daily analysis panel

# 1) Load daily spreads
if DAILY_SPREADS_PATH is None:
    # try a few common locations if not provided
    DAILY_SPREADS_PATH = next((p for p in [
        Path("../data/tips_treasury_implied_rf_2010.parquet"),
    ] if p.exists()), None)

if DAILY_SPREADS_PATH is None:
    raise FileNotFoundError("Set DAILY_SPREADS_PATH to your daily spreads file.")

daily_raw = _read_any(DAILY_SPREADS_PATH)
daily_raw = _ensure_date(daily_raw, "date").sort_values("date")

# 2) Convert to long spreads (date, series_id, value)
if {"series_id","value"}.issubset(daily_raw.columns):
    daily_long = daily_raw[["date","series_id","value"]].copy()
    spread_cols = sorted(daily_long["series_id"].unique())
else:
    # wide -> long
    daily_long, spread_cols = _wide_to_long_spreads(daily_raw, "date", "value")

# 3) Merge daily controls (if available)
repo = _read_any(REPO_RATES_PATH)
risk = _read_any(RISK_CONTROLS_PATH)

if repo is not None:
    repo = _ensure_date(repo, "date").sort_values("date")
if risk is not None:
    risk = _ensure_date(risk, "date").sort_values("date")

# merge controls into wide daily panel (for per-series regressions)
daily_wide = daily_raw.copy()
if repo is not None:
    daily_wide = daily_wide.merge(repo, on="date", how="left")
if risk is not None:
    daily_wide = daily_wide.merge(risk, on="date", how="left")

# derive TGCR/BGCR spreads if present
if "SOFR" in daily_wide.columns and "TGCR" in daily_wide.columns and "TGCR_minus_SOFR" not in daily_wide.columns:
    daily_wide["TGCR_minus_SOFR"] = daily_wide["TGCR"] - daily_wide["SOFR"]
if "SOFR" in daily_wide.columns and "BGCR" in daily_wide.columns and "BGCR_minus_SOFR" not in daily_wide.columns:
    daily_wide["BGCR_minus_SOFR"] = daily_wide["BGCR"] - daily_wide["SOFR"]

# also merge controls into long panel for pooled regressions
daily_long = daily_long.merge(daily_wide.drop(columns=spread_cols, errors="ignore"), on="date", how="left")

print("Daily spreads file:", DAILY_SPREADS_PATH)
print("Series count:", len(spread_cols))
print("Example series:", spread_cols[:10])
daily_long.head()


Daily spreads file: ..\data\tips_treasury_implied_rf_2010.parquet
Series count: 4
Example series: ['arb_10', 'arb_2', 'arb_20', 'arb_5']


Unnamed: 0,date,series_id,value,real_cc2,real_cc5,real_cc10,real_cc20,nom_zc2,nom_zc5,nom_zc10,...,SOFR,TGCR,BGCR,sofr_minus_tgcr,sofr_minus_bgcr,VIX,HY_OAS,BAA10Y,TGCR_minus_SOFR,BGCR_minus_SOFR
0,2010-01-04,arb_10,23.935082,-0.000515,0.00701,0.016359,0.021538,109.505229,268.43978,428.047942,...,,,,,,,,,,
1,2010-01-05,arb_10,22.439776,-0.001248,0.006286,0.015905,0.020813,101.824926,259.540905,420.521627,...,,,,,,,,,,
2,2010-01-06,arb_10,19.821813,-0.001413,0.006443,0.016328,0.021353,102.875564,264.569315,428.767485,...,,,,,,,,,,
3,2010-01-07,arb_10,21.670143,-0.00225,0.006208,0.015919,0.021503,104.805405,266.283644,427.808072,...,,,,,,,,,,
4,2010-01-08,arb_10,22.848053,-0.002142,0.005842,0.015771,0.021135,98.996805,262.434519,426.160609,...,,,,,,,,,,


In [8]:
# CELL 4 — Layer 1: Event-study regressions (TOTAL vs DIRECT) + jump estimates
# Outputs:
#  - layer1_eventstudy_coeffs.csv
#  - layer1_jump_estimates.csv

direct_controls = [c for c in DIRECT_CONTROL_CANDS if c in daily_long.columns]

coeff_rows = []
jump_rows = []

for ev_name, t0 in EVENTS:
    for W in WINDOWS:
        # pooled event study (series FE) — TOTAL
        pooled_total = _run_eventstudy_pooled(daily_long, t0, W, controls=[], omit_k=OMIT_K, lags=NW_LAGS_DAILY)
        pooled_total["event"] = ev_name
        pooled_total["t0"] = t0
        pooled_total["W"] = W
        pooled_total["spec"] = "TOTAL"
        pooled_total["series_id"] = "__pooled__"
        coeff_rows.append(pooled_total)

        # pooled — DIRECT (controls)
        pooled_direct = _run_eventstudy_pooled(daily_long, t0, W, controls=direct_controls, omit_k=OMIT_K, lags=NW_LAGS_DAILY)
        pooled_direct["event"] = ev_name
        pooled_direct["t0"] = t0
        pooled_direct["W"] = W
        pooled_direct["spec"] = "DIRECT"
        pooled_direct["series_id"] = "__pooled__"
        coeff_rows.append(pooled_direct)

        # separate regressions by series
        for sid in spread_cols:
            sub = daily_wide[["date"]].copy()
            # attach outcome from original raw file if wide; otherwise pull from long
            if sid in daily_wide.columns:
                sub["y"] = pd.to_numeric(daily_wide[sid], errors="coerce")
            else:
                sub = sub.merge(daily_long[daily_long["series_id"]==sid][["date","value"]], on="date", how="left")
                sub["y"] = pd.to_numeric(sub["value"], errors="coerce")

            # attach controls
            for c in direct_controls:
                sub[c] = daily_wide[c].values if c in daily_wide.columns else np.nan

            sub = sub.dropna(subset=["y"]).sort_values("date")

            # TOTAL
            est_tot = _run_eventstudy_one(sub.rename(columns={"y":"y"}), "y", t0, W, controls=[], omit_k=OMIT_K, lags=NW_LAGS_DAILY)
            est_tot["event"] = ev_name
            est_tot["t0"] = t0
            est_tot["W"] = W
            est_tot["spec"] = "TOTAL"
            est_tot["series_id"] = sid
            coeff_rows.append(est_tot)

            # DIRECT
            est_dir = _run_eventstudy_one(sub.rename(columns={"y":"y"}), "y", t0, W, controls=direct_controls, omit_k=OMIT_K, lags=NW_LAGS_DAILY)
            est_dir["event"] = ev_name
            est_dir["t0"] = t0
            est_dir["W"] = W
            est_dir["spec"] = "DIRECT"
            est_dir["series_id"] = sid
            coeff_rows.append(est_dir)

            # jump estimate (TOTAL only, using raw y)
            jump = _jump_estimate(sub.rename(columns={"y":"y"}), "y", t0, W)
            jump_rows.append({
                "event": ev_name,
                "t0": t0,
                "W": W,
                "series_id": sid,
                "jump_post_minus_pre": jump
            })

coeffs = pd.concat(coeff_rows, ignore_index=True)
jumps = pd.DataFrame(jump_rows)

coeff_path = OUT_DIR / "layer1_eventstudy_coeffs.csv"
jump_path  = OUT_DIR / "layer1_jump_estimates.csv"
coeffs.to_csv(coeff_path, index=False)
jumps.to_csv(jump_path, index=False)

print("Saved:", coeff_path)
print("Saved:", jump_path)
print("DIRECT controls used:", direct_controls)
coeffs.head()


AttributeError: 'numpy.ndarray' object has no attribute 'get'

In [None]:
# CELL 5 — Layer 1 plots: (i) time-series per series + event lines, (ii) event-study coefficient panels
# Saves PNGs into FIG_DIR

# (i) time-series per series (first 12 series to avoid huge output; edit if needed)
series_to_plot = spread_cols[:12]
for sid in series_to_plot:
    s = daily_wide[["date"]].copy()
    if sid in daily_wide.columns:
        s["y"] = pd.to_numeric(daily_wide[sid], errors="coerce")
    else:
        s = s.merge(daily_long[daily_long["series_id"]==sid][["date","value"]], on="date", how="left")
        s["y"] = pd.to_numeric(s["value"], errors="coerce")

    s = s.dropna(subset=["y"]).sort_values("date")

    fig, ax = plt.subplots(figsize=(12,4))
    ax.plot(s["date"], s["y"], label=sid)
    for d, _ in EVENTS:
        ax.axvline(dict(EVENTS)[d], linestyle="--", linewidth=1)
    ax.set_title(f"Daily series: {sid}")
    ax.set_xlabel("Date")
    ax.set_ylabel("Units of series (as provided)")
    ax.legend()
    fig.tight_layout()
    fig.savefig(FIG_DIR / f"ts_{re.sub('[^A-Za-z0-9_\-]+','_',sid)}.png", dpi=200)
    plt.show()

# (ii) event-study coefficient panels: per event, per W, pooled and first 4 series
plot_series = ["__pooled__"] + spread_cols[:4]

for ev_name, t0 in EVENTS:
    for W in WINDOWS:
        sub = coeffs[(coeffs["event"]==ev_name) & (coeffs["W"]==W) & (coeffs["series_id"].isin(plot_series))].copy()
        if sub.empty:
            continue

        # grid: rows=series, cols=spec (TOTAL, DIRECT)
        series_list = plot_series
        fig, axes = plt.subplots(len(series_list), 1, figsize=(12, 3.2*len(series_list)), sharex=True)

        if len(series_list)==1:
            axes = [axes]

        for i, sid in enumerate(series_list):
            ax = axes[i]
            for spec in ["TOTAL","DIRECT"]:
                s = sub[(sub["series_id"]==sid) & (sub["spec"]==spec)].sort_values("k")
                ax.plot(s["k"], s["beta"], label=spec)
                ax.fill_between(s["k"], s["lo"], s["hi"], alpha=0.15)
            ax.axvline(0, linestyle="--", linewidth=1)
            ax.axhline(0, linestyle=":", linewidth=1)
            ax.set_title(f"{ev_name} (t0={t0.date()}) | W={W} | series={sid}")
            ax.set_ylabel("beta")
            ax.legend()

        axes[-1].set_xlabel("event time k (days)")
        fig.tight_layout()
        fig.savefig(FIG_DIR / f"eventstudy_{ev_name}_W{W}.png", dpi=200)
        plt.show()

print("Saved figures to:", FIG_DIR)


In [None]:
# CELL 6 — Build weekly panel for Layer 2 mechanism regressions
# Output: layer2_weekly_panel.csv

# Weekly aggregation rule: Wednesday week-ending mean of daily series
def to_weekly_mean(long_df):
    tmp = long_df.copy()
    tmp = tmp.set_index("date")
    w = (tmp.groupby("series_id")
           .apply(lambda g: g[["value"]].resample("W-WED").mean())
           .reset_index()
           .rename(columns={"value":"wedge_weekly"}))
    return w

# Base: weekly mean of each series
base_weekly = daily_long[["date","series_id","value"]].copy()
base_weekly["value"] = pd.to_numeric(base_weekly["value"], errors="coerce")
base_weekly = base_weekly.dropna(subset=["value"])
wk = to_weekly_mean(base_weekly)

# Weekly change in spread
wk = wk.sort_values(["series_id","date"])
wk["dwedge"] = wk.groupby("series_id")["wedge_weekly"].diff()

# Relief indicator
wk["Relief"] = ((wk["date"] >= RELIEF_START) & (wk["date"] <= RELIEF_END)).astype(int)

# Bank exposure (daily -> weekly mean -> z-score)
bank = _read_any(BANK_EXPOSURE_DAILY_PATH)
bank_exposure_col = None
if bank is not None:
    bank = _ensure_date(bank, "date").sort_values("date")
    # choose best available exposure column
    for c in ["agg_exempt_share","agg_exempt_share_proxy","agg_reserves_share","agg_exempt_share_min","agg_exempt_share_proxy"]:
        if c in bank.columns:
            bank_exposure_col = c
            break
    if bank_exposure_col is None:
        # pick any numeric column other than date
        num_cols = [c for c in bank.columns if c != "date" and pd.api.types.is_numeric_dtype(bank[c])]
        bank_exposure_col = num_cols[0] if num_cols else None

if bank is not None and bank_exposure_col is not None:
    bank_w = bank.set_index("date")[[bank_exposure_col]].resample("W-WED").mean().reset_index()
    bank_w["bank_exposure_z"] = _standardize_z(bank_w[bank_exposure_col])
    wk = wk.merge(bank_w[["date","bank_exposure_z"]], on="date", how="left")
else:
    wk["bank_exposure_z"] = np.nan
    print("WARNING: bank exposure daily file missing or no usable exposure column; bank_exposure_z will be NaN.")

# Dealer utilization (primary dealer long -> weekly index -> lag1)
pd_long = _read_any(PRIMARY_DEALER_LONG_PATH)
if pd_long is None:
    wk["dealer_util_z_lag1w"] = np.nan
    print("WARNING: primary dealer file missing; dealer_util_z_lag1w will be NaN.")
else:
    pd_long = _ensure_date(pd_long, "date").sort_values("date")
    if not {"mnemonic","value"}.issubset(pd_long.columns):
        raise ValueError(f"Primary dealer file missing mnemonic/value. cols={pd_long.columns.tolist()}")

    # utilization index uses repo financing series if present (fallback to any RP_* Treasury/TIPS series)
    rp_cands = [
        "NYPD-PD_RP_T_TOT-A",
        "NYPD-PD_RP_TIPS_TOT-A",
        "NYPD-PD_RP_TOT-A",
    ]
    available = [c for c in rp_cands if c in set(pd_long["mnemonic"])]
    if not available:
        available = [m for m in pd_long["mnemonic"].unique() if m.startswith("NYPD-PD_RP_")][:3]

    pd_wide = (pd_long[pd_long["mnemonic"].isin(available)]
               .pivot_table(index="date", columns="mnemonic", values="value", aggfunc="first")
               .sort_index()
               .reset_index())

    # utilization index = mean z-score across chosen repo series
    for c in available:
        pd_wide[c] = pd.to_numeric(pd_wide[c], errors="coerce")
        pd_wide[f"z_{c}"] = _standardize_z(pd_wide[c])
    zcols = [f"z_{c}" for c in available]
    pd_wide["dealer_util_z"] = pd_wide[zcols].mean(axis=1)

    # merge and lag
    wk = wk.merge(pd_wide[["date","dealer_util_z"]], on="date", how="left")
    wk = wk.sort_values(["series_id","date"])
    wk["dealer_util_z_lag1w"] = wk.groupby("series_id")["dealer_util_z"].shift(1)

# Controls (weekly mean from daily controls if present in daily_wide)
ctrl_cols = [c for c in ["VIX","HY_OAS","BAA10Y","SOFR","spr_tgcr","spr_effr"] if c in daily_wide.columns]
if ctrl_cols:
    ctrl_w = daily_wide.set_index("date")[ctrl_cols].resample("W-WED").mean().reset_index()
    wk = wk.merge(ctrl_w, on="date", how="left")

wk_path = OUT_DIR / "layer2_weekly_panel.csv"
wk.to_csv(wk_path, index=False)
print("Saved:", wk_path)
wk.head()


In [None]:
# CELL 7 — Layer 2 mechanism regressions (weekly; by series and pooled)
# Model: dwedge = b0 + b1*Relief + b2*(Relief*bank_exposure_z) + b3*(Relief*dealer_util_z_lag1w) + controls + e
# HAC: NW_LAGS_WEEKLY
# Output: layer2_mechanism_results.csv

def run_mech(df, y="dwedge", add_controls=True, lags=4):
    X = pd.DataFrame({
        "Relief": df["Relief"].astype(float),
        "Relief_x_bank": df["Relief"].astype(float) * df["bank_exposure_z"].astype(float),
        "Relief_x_util": df["Relief"].astype(float) * df["dealer_util_z_lag1w"].astype(float),
    })
    if add_controls:
        for c in ["VIX","HY_OAS","BAA10Y","SOFR","spr_tgcr","spr_effr"]:
            if c in df.columns:
                X[c] = pd.to_numeric(df[c], errors="coerce")
    res = _nw_ols(pd.to_numeric(df[y], errors="coerce"), X, lags=lags)
    rows = []
    for nm in ["Relief","Relief_x_bank","Relief_x_util"]:
        rows.append({
            "coef": nm,
            "beta": res.params.get(nm, np.nan),
            "se": res.bse.get(nm, np.nan),
        })
    out = pd.DataFrame(rows)
    out["lo"] = out["beta"] - 1.96*out["se"]
    out["hi"] = out["beta"] + 1.96*out["se"]
    return out

results = []

# pooled across series (series FE)
wk2 = wk.dropna(subset=["dwedge"]).copy()
fe = pd.get_dummies(wk2["series_id"], prefix="fe", drop_first=True)
X = pd.DataFrame({
    "Relief": wk2["Relief"].astype(float),
    "Relief_x_bank": wk2["Relief"].astype(float) * wk2["bank_exposure_z"].astype(float),
    "Relief_x_util": wk2["Relief"].astype(float) * wk2["dealer_util_z_lag1w"].astype(float),
})
for c in ["VIX","HY_OAS","BAA10Y","SOFR","spr_tgcr","spr_effr"]:
    if c in wk2.columns:
        X[c] = pd.to_numeric(wk2[c], errors="coerce")
X = pd.concat([X.reset_index(drop=True), fe.reset_index(drop=True)], axis=1)
pooled = _nw_ols(pd.to_numeric(wk2["dwedge"], errors="coerce"), X, lags=NW_LAGS_WEEKLY)

for nm in ["Relief","Relief_x_bank","Relief_x_util"]:
    results.append({
        "series_id": "__pooled__",
        "coef": nm,
        "beta": pooled.params.get(nm, np.nan),
        "se": pooled.bse.get(nm, np.nan),
    })

# by series
for sid in sorted(wk["series_id"].unique()):
    sub = wk[wk["series_id"]==sid].dropna(subset=["dwedge"]).copy()
    if len(sub) < 25:
        continue
    out = run_mech(sub, y="dwedge", add_controls=True, lags=NW_LAGS_WEEKLY)
    out["series_id"] = sid
    results.append(out)

mech = pd.concat([r if isinstance(r, pd.DataFrame) else pd.DataFrame([r]) for r in results], ignore_index=True)
mech["lo"] = mech["beta"] - 1.96*mech["se"]
mech["hi"] = mech["beta"] + 1.96*mech["se"]

mech_path = OUT_DIR / "layer2_mechanism_results.csv"
mech.to_csv(mech_path, index=False)

print("Saved:", mech_path)
mech.head()


In [None]:
# CELL 8 — Layer 2 plots: coefficient plot for pooled + top series
import matplotlib.pyplot as plt
import numpy as np

focus = mech[mech["coef"].isin(["Relief","Relief_x_bank","Relief_x_util"])].copy()

# Select pooled + first few series (by abs Relief effect)
pool = focus[focus["series_id"]=="__pooled__"].copy()
rest = focus[focus["series_id"]!="__pooled__"].copy()

rank = (rest[rest["coef"]=="Relief"]
        .assign(absb=lambda d: d["beta"].abs())
        .sort_values("absb", ascending=False)
        .head(8)["series_id"].tolist())

plot_df = pd.concat([pool, rest[rest["series_id"].isin(rank)]], ignore_index=True)

plot_df["label"] = plot_df["series_id"] + " | " + plot_df["coef"]

plot_df = plot_df.sort_values(["coef","series_id"]).reset_index(drop=True)
y = np.arange(len(plot_df))

fig, ax = plt.subplots(figsize=(12, max(5, 0.28*len(plot_df))))
ax.errorbar(
    plot_df["beta"], y,
    xerr=[plot_df["beta"]-plot_df["lo"], plot_df["hi"]-plot_df["beta"]],
    fmt="o"
)
ax.axvline(0, linestyle="--", linewidth=1)
ax.set_yticks(y)
ax.set_yticklabels(plot_df["label"])
ax.set_title("Weekly mechanism coefficients (95% CI): pooled + top series")
ax.set_xlabel("Coefficient")
fig.tight_layout()
fig.savefig(FIG_DIR / "layer2_mechanism_coeffs.png", dpi=200)
plt.show()

print("Saved:", FIG_DIR / "layer2_mechanism_coeffs.png")
