In [2]:
# level38_vrp_proxy.py
# Python-only, free data sources (yfinance). No paid data.
# Outputs: CSV (timeseries), JSON (metrics).
# Usage: python level38_vrp_proxy.py

import numpy as np
import pandas as pd
import yfinance as yf
from dataclasses import dataclass
from typing import Dict, Tuple, Optional

# ----------------------------- Config -----------------------------
@dataclass
class Config:
    start: str = "2005-01-01"
    end: Optional[str] = None
    equity: str = "SPY"
    cash: str = "SHY"         # cash-like ETF proxy; can switch to "BIL"
    vix: str = "^VIX"
    auto_adjust: bool = True  # True => use Close (already adjusted); False => use Adj Close
    rv_window: int = 21       # realized variance lookback (trading days ≈ 1 month)
    pct_lookback: int = 252*3 # rolling percentile horizon (~3y)
    rebalance: str = "W-FRI"  # weekly on Fridays
    tc_bps: float = 2e-4      # 2 bps per |Δweight| applied on rebalance date
    target_vol: float = 0.12  # 12% target vol for optional scaling
    lam: float = 0.94         # EWMA lambda for realized vol estimate
    use_gk: bool = False      # Use Garman–Klass realized variance (needs OHLC)

CFG = Config()

# ----------------------------- Utils -----------------------------
def load_adjusted_close(tickers, start, end=None, auto_adjust=True) -> pd.DataFrame:
    """
    Robust yfinance loader that returns a DataFrame of adjusted prices with columns=tickers.
    Handles both single/multi-ticker and auto_adjust True/False.
    """
    raw = yf.download(tickers, start=start, end=end, auto_adjust=auto_adjust, progress=False)
    price_key = "Close" if auto_adjust else "Adj Close"

    if isinstance(raw.columns, pd.MultiIndex):
        # yfinance multi-ticker shape: top level fields ("Open","High","Low","Close","Adj Close","Volume"), second level tickers
        if price_key in raw.columns.get_level_values(0):
            out = raw[price_key].copy()
        else:
            out = raw.xs(price_key, axis=1, level=0, drop_level=True).copy()
    else:
        # single-ticker (no MultiIndex)
        if price_key not in raw.columns:
            raise KeyError(f"Expected column '{price_key}' not found. Columns: {list(raw.columns)}. "
                           f"Try switching auto_adjust to {not auto_adjust}.")
        name = tickers if isinstance(tickers, str) else tickers[0]
        out = raw[[price_key]].rename(columns={price_key: name})

    cols = tickers if isinstance(tickers, (list, tuple)) else [tickers]
    out = out.reindex(columns=cols).dropna(how="all")
    return out

def load_ohlc(tickers, start, end=None, auto_adjust=True) -> pd.DataFrame:
    """
    Return OHLC (and Volume) in a tidy MultiIndex df: columns = (field, ticker).
    Needed only if using Garman–Klass realized variance.
    """
    raw = yf.download(tickers, start=start, end=end, auto_adjust=auto_adjust, progress=False)
    if not isinstance(raw.columns, pd.MultiIndex):
        # Single ticker: promote to MultiIndex to keep interface consistent
        raw = pd.concat({tickers: raw}, axis=1).swaplevel(0,1,axis=1).sort_index(axis=1)
    # Ensure canonical field order exists
    expected = {"Open","High","Low","Close","Adj Close","Volume"}
    if not set(raw.columns.get_level_values(0)).intersection(expected):
        raise ValueError("Unexpected columns from yfinance for OHLC download.")
    return raw

def align_indices(series_list) -> pd.DatetimeIndex:
    idx = series_list[0].index
    for s in series_list[1:]:
        idx = idx.intersection(s.index)
    return idx

def realized_variance_close_to_close(prices: pd.Series, window: int) -> pd.Series:
    r = np.log(prices).diff()
    return r.rolling(window).var() * 252.0

def realized_variance_garman_klass(ohlc: pd.DataFrame, ticker: str, window: int) -> pd.Series:
    """
    GK estimator (annualized variance) using OHLC for a single ticker:
    GK = 0.5*(ln(H/L))^2 - (2*ln(2)-1)*(ln(C/O))^2
    """
    h = ohlc[("High", ticker)]
    l = ohlc[("Low", ticker)]
    o = ohlc[("Open", ticker)]
    c = ohlc[("Close", ticker)]
    term1 = 0.5 * (np.log(h/l))**2
    term2 = (2*np.log(2)-1) * (np.log(c/o))**2
    gk = (term1 - term2).rolling(window).mean() * 252.0
    return gk

def ewma_var_from_returns(rets: pd.Series, lam: float) -> pd.Series:
    # annualized EWMA variance of daily returns
    return rets.pow(2).ewm(alpha=(1-lam)).mean() * 252.0

def kpis(daily_returns: pd.Series) -> Dict[str, float]:
    x = daily_returns.dropna()
    eq = (1+x).cumprod()
    cagr = eq.iloc[-1]**(252/len(x)) - 1 if len(x) > 0 else np.nan
    vol  = x.std()*np.sqrt(252) if len(x) > 1 else np.nan
    sharpe = (x.mean()*252)/(vol+1e-12) if np.isfinite(vol) else np.nan
    maxdd = (eq/eq.cummax()-1).min() if len(eq) > 0 else np.nan
    return {"CAGR": float(cagr), "Vol": float(vol), "Sharpe": float(sharpe), "MaxDD": float(maxdd)}

# ----------------------------- Core Pipeline -----------------------------
def build_vrp_strategy(cfg: Config) -> Tuple[pd.DataFrame, Dict[str, Dict[str, float]]]:
    # 1) Prices (adjusted)
    px = load_adjusted_close([cfg.equity, cfg.cash, cfg.vix], start=cfg.start, end=cfg.end, auto_adjust=cfg.auto_adjust)
    spy = px[cfg.equity].dropna()
    shy = px[cfg.cash].dropna()
    vix = px[cfg.vix].dropna()

    # Align index across all three
    idx = align_indices([spy, shy, vix])
    spy, shy, vix = spy.reindex(idx), shy.reindex(idx), vix.reindex(idx)

    # 2) Realized variance (choose CC or GK)
    if cfg.use_gk:
        ohlc = load_ohlc([cfg.equity], start=cfg.start, end=cfg.end, auto_adjust=cfg.auto_adjust)
        ohlc = ohlc.reindex(idx, level=1)
        rv = realized_variance_garman_klass(ohlc, cfg.equity, cfg.rv_window)
    else:
        rv = realized_variance_close_to_close(spy, cfg.rv_window)

    # 3) Implied variance from VIX (annualized)
    iv = (vix / 100.0)**2

    # 4) VRP and rolling percentile
    vrp = (iv - rv).dropna()
    vrp_pct = vrp.rolling(cfg.pct_lookback, min_periods=cfg.rv_window*6).rank(pct=True)

    # 5) Discrete policy from percentile
    w_spy = pd.Series(0.0, index=vrp_pct.index)
    w_spy[vrp_pct >= 0.60] = 1.0
    w_spy[(vrp_pct >= 0.40) & (vrp_pct < 0.60)] = 0.5

    # 6) Optional EWMA target-vol scaling
    r_spy_log = np.log(spy).diff()
    ewma_var = ewma_var_from_returns(r_spy_log, cfg.lam).reindex(w_spy.index).ffill()
    scale = (cfg.target_vol / np.sqrt(ewma_var)).clip(upper=1.5)
    w_spy = (w_spy * scale).clip(0, 1.5)
    w_shy = 1.0 - w_spy

    # 7) Weekly rebalance → daily weights (forward-fill)
    w_week = w_spy.resample(cfg.rebalance).last().dropna()
    w_week_shy = w_shy.reindex(w_week.index)
    w_daily_spy = w_week.reindex(idx).ffill().fillna(0.0)
    w_daily_shy = w_week_shy.reindex(idx).ffill().fillna(1.0 - w_daily_spy)

    # 8) Daily returns & transaction costs (only on rebalance dates)
    ret_spy = spy.pct_change().fillna(0.0)
    ret_shy = shy.pct_change().fillna(0.0)
    gross = w_daily_spy.shift().fillna(0.0)*ret_spy + w_daily_shy.shift().fillna(0.0)*ret_shy

    turnover_week = w_week.diff().abs().fillna(0.0)
    tc_daily = turnover_week.reindex(idx).fillna(0.0) * cfg.tc_bps
    net = gross - tc_daily

    # 9) Metrics
    metrics = {
        "Strategy": kpis(net),
        "BuyHold_SPY": kpis(ret_spy),
    }

    # 10) Collect outputs
    out = pd.DataFrame({
        "SPY": spy,
        "SHY": shy,
        "VIX": vix,
        "RV": rv.reindex(idx),
        "IV": iv.reindex(idx),
        "VRP": vrp.reindex(idx),
        "VRP_pct": vrp_pct.reindex(idx),
        "w_SPY": w_daily_spy,
        "w_SHY": w_daily_shy,
        "ret_gross": gross,
        "ret_net": net,
    }).dropna()

    return out, metrics

# ----------------------------- Main -----------------------------
def main():
    out, metrics = build_vrp_strategy(CFG)

    # Save artifacts
    ts_path = "level38_vrp_timeseries.csv"
    m_path = "level38_metrics.json"
    out.to_csv(ts_path, index=True)
    pd.DataFrame(metrics).to_json(m_path, indent=2)

    print(f"[OK] Saved timeseries → {ts_path}")
    print(f"[OK] Saved metrics   → {m_path}")
    print("\nMetrics summary:")
    for k, v in metrics.items():
        print(f"  {k}: ", {kk: round(vv, 4) for kk, vv in v.items()})

if __name__ == "__main__":
    main()


[OK] Saved timeseries → level38_vrp_timeseries.csv
[OK] Saved metrics   → level38_metrics.json

Metrics summary:
  Strategy:  {'CAGR': 0.0688, 'Vol': 0.0947, 'Sharpe': 0.7499, 'MaxDD': -0.1803}
  BuyHold_SPY:  {'CAGR': 0.1069, 'Vol': 0.1909, 'Sharpe': 0.6275, 'MaxDD': -0.5519}
