# SLR summary pipeline

Run-all reproducible pipeline for daily_long event-study analysis.

In [1]:
import os
from pathlib import Path
import sys
import json
import warnings
sys.path.append(os.path.abspath("../src"))
from slr_bucket.config import PipelineConfig

# CONFIG
CONFIG = PipelineConfig(
    event_dates=['2020-04-01', '2021-03-19', '2021-03-31'],
    windows=[3, 5, 10],
    event_bins=[(-60,-41),(-40,-21),(-20,-1),(0,0),(1,20),(21,40),(41,60)],
    dependent_series=None,
    tenor_subset=None,
    total_controls=[],
    direct_controls=['sofr', 'tgcr', 'bgcr'],
    hac_lags=5,
    bootstrap_reps=200,
    bootstrap_block_size=5,
    random_seed=42,
    output_root='outputs/summary_pipeline',
    cache_root='outputs/cache',
)
REPO_ROOT = Path().cwd().resolve().parent
SRC_DIR = REPO_ROOT / 'src'
if str(SRC_DIR) not in sys.path:
    sys.path.insert(0, str(SRC_DIR))
DATA_DIR = REPO_ROOT / 'data'
print(json.dumps(CONFIG.__dict__, indent=2, default=str))

{
  "event_dates": [
    "2020-04-01",
    "2021-03-19",
    "2021-03-31"
  ],
  "windows": [
    3,
    5,
    10
  ],
  "event_bins": [
    [
      -60,
      -41
    ],
    [
      -40,
      -21
    ],
    [
      -20,
      -1
    ],
    [
      0,
      0
    ],
    [
      1,
      20
    ],
    [
      21,
      40
    ],
    [
      41,
      60
    ]
  ],
  "dependent_series": null,
  "tenor_subset": null,
  "total_controls": [],
  "direct_controls": [
    "sofr",
    "tgcr",
    "bgcr"
  ],
  "hac_lags": 5,
  "bootstrap_reps": 200,
  "bootstrap_block_size": 5,
  "random_seed": 42,
  "output_root": "outputs/summary_pipeline",
  "cache_root": "outputs/cache"
}


## Data inputs and construction of daily_long

This section discovers local files, builds a data catalog, validates `daily_long`, and persists deterministic outputs.

In [2]:
import logging
import pandas as pd
from slr_bucket.io import build_data_catalog, find_daily_long, discover_funding_series
from slr_bucket.pipeline import prepare_run_dirs, refresh_latest, setup_logging, write_catalog_outputs, write_run_readme
from slr_bucket.validation import validate_daily_long

run_dirs = prepare_run_dirs(REPO_ROOT, CONFIG)
setup_logging(run_dirs['logs'] / 'pipeline.log')
logger = logging.getLogger('summary_pipeline')

catalog = build_data_catalog(DATA_DIR)
write_catalog_outputs(catalog, run_dirs['data'])
logger.info('catalog rows=%s', len(catalog))

funding_mapping = discover_funding_series(DATA_DIR)
logger.info('funding mapping: %s', funding_mapping)

daily_long = validate_daily_long(find_daily_long(DATA_DIR))
cache_dir = REPO_ROOT / CONFIG.cache_root
cache_dir.mkdir(parents=True, exist_ok=True)
cache_path = cache_dir / f'daily_long_{CONFIG.to_hash()}.parquet'
daily_long.to_parquet(cache_path, index=False)
logger.info('daily_long rows=%s saved=%s', len(daily_long), cache_path)

2026-02-26 11:12:21,954 INFO summary_pipeline - catalog rows=25
2026-02-26 11:12:22,052 INFO summary_pipeline - funding mapping: {'ofr': 'C:\\Users\\Owner\\Box\\Winter26\\slr_bucket\\data\\event_inputs\\primary_dealer_stats_ofr_stfm_nypd_long.csv', 'repo': 'C:\\Users\\Owner\\Box\\Winter26\\slr_bucket\\data\\event_inputs\\repo_rates_combined.csv'}
2026-02-26 11:12:22,130 INFO summary_pipeline - daily_long rows=14 saved=C:\Users\Owner\Box\Winter26\slr_bucket\outputs\cache\daily_long_f562fdf488e4.parquet


In [3]:
pivot = daily_long.pivot_table(index=['date','tenor'], columns='series', values='value', aggfunc='last').reset_index()
if CONFIG.tenor_subset:
    pivot = pivot[pivot['tenor'].isin(CONFIG.tenor_subset)]

series_candidates = [c for c in pivot.columns if c not in {'date', 'tenor'}]
if CONFIG.dependent_series:
    dep_series = [s for s in CONFIG.dependent_series if s in series_candidates]
else:
    # dep_series = series_candidates[: min(len(series_candidates), 5)]
    control_set = set(CONFIG.total_controls or []) | set(CONFIG.direct_controls or [])
    auto_candidates = [c for c in series_candidates if c not in control_set]

    dep_series = auto_candidates[: min(len(auto_candidates), 5)] if auto_candidates else series_candidates[:5]

if not dep_series:
    raise ValueError('No dependent series selected. Check CONFIG.dependent_series or daily_long series availability.')

logger.info('dependent series: %s', dep_series)
logger.info('tenors: %s', sorted(pivot['tenor'].dropna().astype(str).unique().tolist()))

2026-02-26 11:12:22,149 INFO summary_pipeline - dependent series: ['wedge']
2026-02-26 11:12:22,152 INFO summary_pipeline - tenors: ['2y']


## Econometric designs

- Windowed jumps: pre/post mean-shift regression with HAC SE and bootstrap robustness.
- Event-study bins: total-effect and direct-effect specifications.

In [4]:
import numpy as np
from slr_bucket.econometrics.event_study import jump_estimator, block_bootstrap_jump, event_study_regression
from slr_bucket.plotting.plots import plot_series_with_events, plot_event_paths

jump_rows = []
bin_rows = []
for tenor, tdf in pivot.groupby('tenor'):
    tdf = tdf.sort_values('date').copy()
    for y in dep_series:
        if y not in tdf.columns:
            continue
        direct_controls = [c for c in (CONFIG.direct_controls or []) if (c in tdf.columns and c != y)]
        sub_cols = ['date', y] + direct_controls
        # preserve order, remove duplicates
        seen = set()
        sub_cols = [c for c in sub_cols if not (c in seen or seen.add(c))]
        sub = tdf[sub_cols].copy()
        sub = sub.dropna(subset=[y])
        if sub.empty:
            warnings.warn(f'No data for {tenor}-{y}')
            continue

        plot_series_with_events(sub.rename(columns={y: 'dep'}), 'dep', CONFIG.event_dates, f'{tenor} {y}', run_dirs['figures'] / f'series_{tenor}_{y}.png')

        for event in CONFIG.event_dates:
            for w in CONFIG.windows:
                est, se, n = jump_estimator(sub, y, event, w, controls=None, hac_lags=CONFIG.hac_lags)
                bse = block_bootstrap_jump(sub, y, event, w, controls=None, reps=CONFIG.bootstrap_reps, block_size=CONFIG.bootstrap_block_size, seed=CONFIG.random_seed)
                jump_rows.append({'event_date': event, 'tenor': tenor, 'series': y, 'window': w, 'spec': 'total', 'estimate': est, 'se': se, 'bootstrap_se': bse, 'ci_low': est-1.96*se if np.isfinite(est) and np.isfinite(se) else np.nan, 'ci_high': est+1.96*se if np.isfinite(est) and np.isfinite(se) else np.nan, 'n': n})

                est_d, se_d, n_d = jump_estimator(sub, y, event, w, controls=direct_controls, hac_lags=CONFIG.hac_lags)
                jump_rows.append({'event_date': event, 'tenor': tenor, 'series': y, 'window': w, 'spec': 'direct', 'estimate': est_d, 'se': se_d, 'bootstrap_se': np.nan, 'ci_low': est_d-1.96*se_d if np.isfinite(est_d) and np.isfinite(se_d) else np.nan, 'ci_high': est_d+1.96*se_d if np.isfinite(est_d) and np.isfinite(se_d) else np.nan, 'n': n_d})

            for spec_name, controls in [('total', None), ('direct', direct_controls)]:
                bins_df = event_study_regression(sub, y, event, CONFIG.event_bins, controls=controls, hac_lags=CONFIG.hac_lags)
                bins_df['event_date'] = event
                bins_df['tenor'] = tenor
                bins_df['series'] = y
                bins_df['spec'] = spec_name
                bin_rows.append(bins_df)
                plot_event_paths(bins_df, f'{tenor} {y} {event} {spec_name}', run_dirs['figures'] / f'event_path_{tenor}_{y}_{event}_{spec_name}.png')

jump_table = pd.DataFrame(jump_rows)
bin_table = pd.concat(bin_rows, ignore_index=True) if bin_rows else pd.DataFrame()

jump_table.to_csv(run_dirs['tables'] / 'jump_estimates.csv', index=False)
if not bin_table.empty:
    bin_table.to_csv(run_dirs['tables'] / 'event_study_bins.csv', index=False)

jump_table.head()

  ax.set_xticklabels(df["term"], rotation=45, ha="right")
  ax.set_xticklabels(df["term"], rotation=45, ha="right")
  bin_table = pd.concat(bin_rows, ignore_index=True) if bin_rows else pd.DataFrame()


Unnamed: 0,event_date,tenor,series,window,spec,estimate,se,bootstrap_se,ci_low,ci_high,n
0,2020-04-01,2y,wedge,3,total,,,,,,4
1,2020-04-01,2y,wedge,3,direct,,,,,,4
2,2020-04-01,2y,wedge,5,total,,,,,,5
3,2020-04-01,2y,wedge,5,direct,,,,,,5
4,2020-04-01,2y,wedge,10,total,,,,,,7


In [5]:
import numpy as np
from slr_bucket.econometrics.event_study import jump_estimator, block_bootstrap_jump, event_study_regression
from slr_bucket.plotting.plots import plot_series_with_events, plot_event_paths

jump_rows = []
bin_rows = []
for tenor, tdf in pivot.groupby('tenor'):
    tdf = tdf.sort_values('date').copy()
    for y in dep_series:
        if y not in tdf.columns:
            continue
        # sub = tdf[['date', y] + [c for c in CONFIG.direct_controls if c in tdf.columns]].copy()
        
        sub = sub.dropna(subset=[y])
        if sub.empty:
            warnings.warn(f'No data for {tenor}-{y}')
            continue

        plot_series_with_events(sub.rename(columns={y: 'dep'}), 'dep', CONFIG.event_dates, f'{tenor} {y}', run_dirs['figures'] / f'series_{tenor}_{y}.png')

        for event in CONFIG.event_dates:
            for w in CONFIG.windows:
                est, se, n = jump_estimator(sub, y, event, w, controls=None, hac_lags=CONFIG.hac_lags)
                bse = block_bootstrap_jump(sub, y, event, w, controls=None, reps=CONFIG.bootstrap_reps, block_size=CONFIG.bootstrap_block_size, seed=CONFIG.random_seed)
                jump_rows.append({'event_date': event, 'tenor': tenor, 'series': y, 'window': w, 'spec': 'total', 'estimate': est, 'se': se, 'bootstrap_se': bse, 'ci_low': est-1.96*se if np.isfinite(est) and np.isfinite(se) else np.nan, 'ci_high': est+1.96*se if np.isfinite(est) and np.isfinite(se) else np.nan, 'n': n})

                est_d, se_d, n_d = jump_estimator(sub, y, event, w, controls=CONFIG.direct_controls, hac_lags=CONFIG.hac_lags)
                jump_rows.append({'event_date': event, 'tenor': tenor, 'series': y, 'window': w, 'spec': 'direct', 'estimate': est_d, 'se': se_d, 'bootstrap_se': np.nan, 'ci_low': est_d-1.96*se_d if np.isfinite(est_d) and np.isfinite(se_d) else np.nan, 'ci_high': est_d+1.96*se_d if np.isfinite(est_d) and np.isfinite(se_d) else np.nan, 'n': n_d})

            for spec_name, controls in [('total', None), ('direct', CONFIG.direct_controls)]:
                bins_df = event_study_regression(sub, y, event, CONFIG.event_bins, controls=controls, hac_lags=CONFIG.hac_lags)
                bins_df['event_date'] = event
                bins_df['tenor'] = tenor
                bins_df['series'] = y
                bins_df['spec'] = spec_name
                bin_rows.append(bins_df)
                plot_event_paths(bins_df, f'{tenor} {y} {event} {spec_name}', run_dirs['figures'] / f'event_path_{tenor}_{y}_{event}_{spec_name}.png')

jump_table = pd.DataFrame(jump_rows)
bin_table = pd.concat(bin_rows, ignore_index=True) if bin_rows else pd.DataFrame()

jump_table.to_csv(run_dirs['tables'] / 'jump_estimates.csv', index=False)
if not bin_table.empty:
    bin_table.to_csv(run_dirs['tables'] / 'event_study_bins.csv', index=False)

jump_table.head()

  ax.set_xticklabels(df["term"], rotation=45, ha="right")
  ax.set_xticklabels(df["term"], rotation=45, ha="right")
  bin_table = pd.concat(bin_rows, ignore_index=True) if bin_rows else pd.DataFrame()


Unnamed: 0,event_date,tenor,series,window,spec,estimate,se,bootstrap_se,ci_low,ci_high,n
0,2020-04-01,2y,wedge,3,total,,,,,,4
1,2020-04-01,2y,wedge,3,direct,,,,,,4
2,2020-04-01,2y,wedge,5,total,,,,,,5
3,2020-04-01,2y,wedge,5,direct,,,,,,5
4,2020-04-01,2y,wedge,10,total,,,,,,7


In [6]:
import numpy as np
from slr_bucket.econometrics.event_study import jump_estimator, block_bootstrap_jump, event_study_regression
from slr_bucket.plotting.plots import plot_series_with_events, plot_event_paths

jump_rows = []
bin_rows = []
for tenor, tdf in pivot.groupby('tenor'):
    tdf = tdf.sort_values('date').copy()
    for y in dep_series:
        if y not in tdf.columns:
            continue
        # sub = tdf[['date', y] + [c for c in CONFIG.direct_controls if c in tdf.columns]].copy()
        
        sub = sub.dropna(subset=[y])
        if sub.empty:
            warnings.warn(f'No data for {tenor}-{y}')
            continue

        plot_series_with_events(sub.rename(columns={y: 'dep'}), 'dep', CONFIG.event_dates, f'{tenor} {y}', run_dirs['figures'] / f'series_{tenor}_{y}.png')

        for event in CONFIG.event_dates:
            for w in CONFIG.windows:
                est, se, n = jump_estimator(sub, y, event, w, controls=None, hac_lags=CONFIG.hac_lags)
                bse = block_bootstrap_jump(sub, y, event, w, controls=None, reps=CONFIG.bootstrap_reps, block_size=CONFIG.bootstrap_block_size, seed=CONFIG.random_seed)
                jump_rows.append({'event_date': event, 'tenor': tenor, 'series': y, 'window': w, 'spec': 'total', 'estimate': est, 'se': se, 'bootstrap_se': bse, 'ci_low': est-1.96*se if np.isfinite(est) and np.isfinite(se) else np.nan, 'ci_high': est+1.96*se if np.isfinite(est) and np.isfinite(se) else np.nan, 'n': n})

                est_d, se_d, n_d = jump_estimator(sub, y, event, w, controls=CONFIG.direct_controls, hac_lags=CONFIG.hac_lags)
                jump_rows.append({'event_date': event, 'tenor': tenor, 'series': y, 'window': w, 'spec': 'direct', 'estimate': est_d, 'se': se_d, 'bootstrap_se': np.nan, 'ci_low': est_d-1.96*se_d if np.isfinite(est_d) and np.isfinite(se_d) else np.nan, 'ci_high': est_d+1.96*se_d if np.isfinite(est_d) and np.isfinite(se_d) else np.nan, 'n': n_d})

            for spec_name, controls in [('total', None), ('direct', CONFIG.direct_controls)]:
                bins_df = event_study_regression(sub, y, event, CONFIG.event_bins, controls=controls, hac_lags=CONFIG.hac_lags)
                bins_df['event_date'] = event
                bins_df['tenor'] = tenor
                bins_df['series'] = y
                bins_df['spec'] = spec_name
                bin_rows.append(bins_df)
                plot_event_paths(bins_df, f'{tenor} {y} {event} {spec_name}', run_dirs['figures'] / f'event_path_{tenor}_{y}_{event}_{spec_name}.png')

jump_table = pd.DataFrame(jump_rows)
bin_table = pd.concat(bin_rows, ignore_index=True) if bin_rows else pd.DataFrame()

jump_table.to_csv(run_dirs['tables'] / 'jump_estimates.csv', index=False)
if not bin_table.empty:
    bin_table.to_csv(run_dirs['tables'] / 'event_study_bins.csv', index=False)

jump_table.head()

  ax.set_xticklabels(df["term"], rotation=45, ha="right")
  ax.set_xticklabels(df["term"], rotation=45, ha="right")
  bin_table = pd.concat(bin_rows, ignore_index=True) if bin_rows else pd.DataFrame()


Unnamed: 0,event_date,tenor,series,window,spec,estimate,se,bootstrap_se,ci_low,ci_high,n
0,2020-04-01,2y,wedge,3,total,,,,,,4
1,2020-04-01,2y,wedge,3,direct,,,,,,4
2,2020-04-01,2y,wedge,5,total,,,,,,5
3,2020-04-01,2y,wedge,5,direct,,,,,,5
4,2020-04-01,2y,wedge,10,total,,,,,,7


## Reproducibility + how to rerun

- Run all cells in Jupyter, or execute `python scripts/run_notebook.py`.
- Outputs are timestamped + config-hashed, and `latest/` is refreshed each run.

In [7]:
notes = (
    f"Processed {len(daily_long)} daily_long rows across {daily_long['tenor'].nunique()} tenors and "
    f"{daily_long['series'].nunique()} series."
)
write_run_readme(run_dirs['run'], CONFIG, notes)
print('Run dir:', run_dirs['run'])
print('Latest:', REPO_ROOT / CONFIG.output_root / 'latest')
latest_dir = refresh_latest(REPO_ROOT, CONFIG, run_dirs['run'])
print('Latest refreshed:', latest_dir)


Run dir: C:\Users\Owner\Box\Winter26\slr_bucket\outputs\summary_pipeline\20260226_171216_f562fdf488e4
Latest: C:\Users\Owner\Box\Winter26\slr_bucket\outputs\summary_pipeline\latest
Latest refreshed: C:\Users\Owner\Box\Winter26\slr_bucket\outputs\summary_pipeline\latest


## Sanity checks

Basic diagnostics for missingness and sample support.

In [8]:
sanity = (
    daily_long.groupby(['tenor','series'])['value']
    .agg(['count','mean','std'])
    .reset_index()
    .sort_values('count', ascending=False)
)
sanity.head(20)

Unnamed: 0,tenor,series,count,mean,std
0,2y,sofr,7,0.01,0.0
1,2y,wedge,7,0.14,0.039158
