# WRDS Flagship Momentum Walk-Forward

This notebook is a quick reader for WRDS walk-forward artefacts under `artifacts/wrds_flagship/<RUN_ID>`.

- Set `WRDS_DATA_ROOT` before running the pipeline (`make wfv-wrds && make report-wrds`).
- Override the run via env (`WRDS_RUN_ID=<RUN_ID>` or `WRDS_ARTIFACT_ROOT=<path>`); otherwise the latest complete directory (with `metrics.json`) is used.
- The spec matches the tightened risk limits (1.25x gross, 20% DD halt, 3% ADV turnover target, 8 positions/sector).
- Plots include equity + drawdown, rolling 12M Sharpe, and per-fold test Sharpe/MaxDD.


In [None]:
from pathlib import Path
import json, os
import pandas as pd
import matplotlib.pyplot as plt

ARTIFACT_ROOT = Path(os.getenv("WRDS_ARTIFACT_ROOT", "artifacts/wrds_flagship"))
RUN_ID = os.getenv("WRDS_RUN_ID")

candidate_dirs = sorted([p for p in ARTIFACT_ROOT.glob("*") if p.is_dir()])
complete_dirs = [p for p in candidate_dirs if (p / "metrics.json").exists()]

if RUN_ID:
    RUN_DIR = ARTIFACT_ROOT / RUN_ID
    if not (RUN_DIR / "metrics.json").exists():
        raise FileNotFoundError(f"metrics.json missing under {RUN_DIR}")
elif complete_dirs:
    RUN_DIR = complete_dirs[-1]
elif candidate_dirs:
    RUN_DIR = candidate_dirs[-1]
else:
    RUN_DIR = ARTIFACT_ROOT / "RUN_ID_HERE"

print(f"Using run dir: {RUN_DIR}")


In [None]:
metrics_path = RUN_DIR / "metrics.json"
if metrics_path.exists():
    metrics = json.loads(metrics_path.read_text())
    headline = {
        "Sharpe_HAC": metrics.get("sharpe_ratio"),
        "Calmar": metrics.get("calmar_ratio"),
        "MaxDD": metrics.get("max_drawdown"),
        "Turnover": metrics.get("total_turnover"),
        "RealityCheck_p": metrics.get("reality_check_p_value"),
    }
    display(pd.Series(headline, name=RUN_DIR.name))
else:
    print(f"Missing metrics.json under {RUN_DIR}")


In [None]:
equity_path = RUN_DIR / "equity_curve.csv"
if equity_path.exists():
    equity = pd.read_csv(equity_path, parse_dates=["timestamp"]).set_index("timestamp")
    equity["returns"] = equity["equity"].pct_change().fillna(0.0)
    dd = 1 - equity["equity"] / equity["equity"].cummax()
    print(f"Max drawdown: {dd.max():.2%}")
else:
    print(f"Missing equity_curve.csv under {RUN_DIR}")


In [None]:
if 'equity' in locals() and not equity.empty:
    fig, axes = plt.subplots(2, 1, figsize=(10, 6), sharex=True)
    equity['equity'].plot(ax=axes[0], title=f"Equity – {RUN_DIR.name}")
    axes[0].grid(alpha=0.3)
    if 'dd' in locals():
        dd.plot(ax=axes[1], color='tomato', title='Drawdown')
        axes[1].set_ylabel('Drawdown')
        axes[1].grid(alpha=0.3)
    fig.tight_layout()
    plt.show()

    rolling_sharpe = (
        equity['returns'].rolling(window=252).mean()
        / equity['returns'].rolling(window=252).std()
    )
    if not rolling_sharpe.dropna().empty:
        fig2, ax2 = plt.subplots(figsize=(10, 3))
        rolling_sharpe.plot(ax=ax2, title='Rolling 12M Sharpe (simple)')
        ax2.axhline(0, color='k', lw=0.5)
        ax2.grid(alpha=0.3)
        fig2.tight_layout()
        plt.show()
else:
    print('Equity not loaded.')


In [None]:
factor_path = RUN_DIR / "factor_exposure.csv"
if factor_path.exists():
    factors = pd.read_csv(factor_path)
    display(factors.head())
else:
    print(f"Missing factor_exposure.csv under {RUN_DIR}")


In [None]:
spa_path = RUN_DIR / "spa.json"
if spa_path.exists():
    spa = json.loads(spa_path.read_text())
    print("SPA p-value:", spa.get("p_value"))
    if spa.get("candidate_stats"):
        display(pd.DataFrame(spa["candidate_stats"]).sort_values("t_stat", ascending=False).head())
else:
    print(f"Missing spa.json under {RUN_DIR}")


In [None]:
turnover = metrics.get("turnover_per_day") if 'metrics' in locals() else None
if turnover:
    traded = metrics.get("traded_days")
    print(f"Average daily turnover: ${turnover:,.0f} across {traded} traded days")


In [None]:
folds_path = RUN_DIR / 'folds.json'
if folds_path.exists():
    folds = json.loads(folds_path.read_text())
    rows = []
    for i, fold in enumerate(folds):
        tm = fold.get('test_metrics', {})
        rows.append({
            'fold': i,
            'test_start': fold.get('test_start'),
            'test_end': fold.get('test_end'),
            'sharpe': tm.get('sharpe_ratio'),
            'max_dd': tm.get('max_drawdown'),
            'cagr': tm.get('cagr'),
        })
    fold_df = pd.DataFrame(rows)
    display(fold_df.head())
    if not fold_df.empty:
        fig, axes = plt.subplots(1, 2, figsize=(12, 4))
        axes[0].bar(fold_df['fold'], fold_df['sharpe'], color='#4e79a7')
        axes[0].axhline(0, color='k', lw=0.5)
        axes[0].set_title('Per-fold test Sharpe')
        axes[1].bar(fold_df['fold'], fold_df['max_dd'] * 100, color='#f28e2b')
        axes[1].set_title('Per-fold MaxDD (%)')
        axes[1].set_ylabel('%')
        axes[1].axhline(0, color='k', lw=0.5)
        plt.tight_layout()
        plt.show()
else:
    print(f'Missing folds.json under {RUN_DIR}')
