In [2]:
# level95_var_es_backtest.py
# Level-95: VaR + ES Backtesting (Kupiec + Christoffersen) on a Portfolio (free-data)
#
# Computes rolling Historical VaR/ES and evaluates:
#  - Kupiec POF (unconditional coverage)
#  - Christoffersen independence test
#  - Christoffersen conditional coverage test
#
# Outputs:
#   - level95_var_es_series.csv
#   - level95_var_es_backtest_summary.json
#
# Run:
#   python level95_var_es_backtest.py
#   python level95_var_es_backtest.py --symbols SPY QQQ IWM --alpha 0.01 --window 750
#   python level95_var_es_backtest.py --weights 0.5 0.3 0.2 --symbols SPY QQQ TLT

import os
import json
import math
import argparse
from dataclasses import dataclass, asdict
from typing import Tuple, List, Dict, Optional

import numpy as np
import pandas as pd
import yfinance as yf


# ----------------------------- Config -----------------------------
@dataclass
class Config:
    symbols: Tuple[str, ...] = ("SPY", "QQQ", "IWM", "EFA", "EEM", "TLT", "LQD", "GLD")
    start: str = "2010-01-01"

    alpha: float = 0.05          # tail probability (e.g., 0.01 or 0.05)
    window: int = 756            # rolling window for historical VaR/ES (~3y trading days)

    use_log_returns: bool = True
    dropna: bool = True

    # portfolio
    weights: Optional[Tuple[float, ...]] = None  # if None -> equal weight

    seed: int = 42

    out_csv: str = "level95_var_es_series.csv"
    out_json: str = "level95_var_es_backtest_summary.json"


# ----------------------------- Robust yfinance loader -----------------------------
def _extract_close_series(px: pd.DataFrame, symbol: str) -> pd.Series:
    if px is None or px.empty:
        raise RuntimeError(f"No data returned for {symbol}")

    if isinstance(px.columns, pd.MultiIndex):
        candidates = [
            ("Adj Close", symbol),
            ("Close", symbol),
            (symbol, "Adj Close"),
            (symbol, "Close"),
        ]
        for key in candidates:
            if key in px.columns:
                s = px[key].copy()
                if isinstance(s, pd.DataFrame):
                    s = s.iloc[:, 0]
                s.name = symbol
                return s

        # fallback scan
        cols = []
        for c in px.columns:
            c0 = str(c[0]).lower()
            c1 = str(c[1]).lower()
            if (symbol.lower() in c0 or symbol.lower() in c1) and ("close" in c0):
                cols.append(c)
        if cols:
            s = px[cols[0]].copy()
            if isinstance(s, pd.DataFrame):
                s = s.iloc[:, 0]
            s.name = symbol
            return s

        raise RuntimeError(f"Could not extract Close/Adj Close for {symbol} from MultiIndex columns.")

    for col in ["Adj Close", "Close"]:
        if col in px.columns:
            s = px[col].copy()
            if isinstance(s, pd.DataFrame):
                s = s.iloc[:, 0]
            s.name = symbol
            return s

    raise RuntimeError(f"Missing Close/Adj Close for {symbol}. Columns={list(px.columns)}")


def load_prices(symbols: Tuple[str, ...], start: str) -> pd.DataFrame:
    symbols = tuple(symbols)

    # try batch download first
    try:
        px_all = yf.download(list(symbols), start=start, progress=False, group_by="column", auto_adjust=False)
        if px_all is not None and not px_all.empty:
            ss = []
            ok = True
            for s in symbols:
                try:
                    ss.append(_extract_close_series(px_all, s))
                except Exception:
                    ok = False
                    break
            if ok and ss:
                return pd.concat(ss, axis=1).sort_index()
    except Exception:
        pass

    # fallback per symbol
    frames = []
    for s in symbols:
        px = yf.download(s, start=start, progress=False, auto_adjust=False)
        frames.append(_extract_close_series(px, s))
    return pd.concat(frames, axis=1).sort_index()


def compute_returns(prices: pd.DataFrame, use_log: bool) -> pd.DataFrame:
    prices = prices.copy()
    prices = prices.replace([np.inf, -np.inf], np.nan)

    if use_log:
        rets = np.log(prices).diff()
    else:
        rets = prices.pct_change()

    rets = rets.replace([np.inf, -np.inf], np.nan)
    rets = rets.dropna(how="all")
    return rets


# ----------------------------- VaR / ES -----------------------------
def rolling_historical_var_es(r: pd.Series, window: int, alpha: float) -> pd.DataFrame:
    """
    Rolling historical VaR and ES on returns series r.
    Convention:
      - VaR is the alpha-quantile return (typically negative).
      - ES is average return <= VaR (also negative).
    """
    r = r.dropna()
    n = len(r)

    var_arr = np.full(n, np.nan, dtype=float)
    es_arr = np.full(n, np.nan, dtype=float)

    vals = r.values
    for t in range(window, n):
        w = vals[t - window: t]
        q = float(np.quantile(w, alpha))
        tail = w[w <= q]
        var_arr[t] = q
        es_arr[t] = float(tail.mean()) if tail.size else np.nan

    out = pd.DataFrame(
        {"VaR": var_arr, "ES": es_arr},
        index=r.index
    )
    return out


# ----------------------------- Backtests -----------------------------
def _safe_log(x: float) -> float:
    return math.log(max(x, 1e-15))


def kupiec_pof(exceed: np.ndarray, alpha: float) -> Dict[str, float]:
    """
    Kupiec Proportion of Failures (POF) test.
    exceed = 1 if loss exceeded VaR (i.e., return < VaR).
    """
    exceed = exceed.astype(int)
    T = int(exceed.size)
    x = int(exceed.sum())

    # Handle edge cases
    if T == 0:
        return {"LR_pof": float("nan"), "p_value": float("nan"), "T": 0, "x": 0, "hit_rate": float("nan")}

    phat = x / T
    # Likelihood ratio
    # LR = -2 [ ln((1-a)^(T-x) a^x) - ln((1-phat)^(T-x) phat^x) ]
    ll0 = (T - x) * _safe_log(1 - alpha) + x * _safe_log(alpha)
    ll1 = (T - x) * _safe_log(1 - phat) + x * _safe_log(phat)
    LR = -2.0 * (ll0 - ll1)

    # p-value via chi-square(1) approx using survival function
    # We'll implement a simple approx: p = 1 - CDF_chi2(LR, df=1)
    p = chi2_sf(LR, df=1)

    return {"LR_pof": float(LR), "p_value": float(p), "T": float(T), "x": float(x), "hit_rate": float(phat)}


def christoffersen_independence(exceed: np.ndarray) -> Dict[str, float]:
    """
    Christoffersen independence test for exceedances (2-state Markov).
    """
    exceed = exceed.astype(int)
    if exceed.size < 2:
        return {"LR_ind": float("nan"), "p_value": float("nan")}

    x_prev = exceed[:-1]
    x_curr = exceed[1:]

    n00 = int(((x_prev == 0) & (x_curr == 0)).sum())
    n01 = int(((x_prev == 0) & (x_curr == 1)).sum())
    n10 = int(((x_prev == 1) & (x_curr == 0)).sum())
    n11 = int(((x_prev == 1) & (x_curr == 1)).sum())

    # transition probs
    p01 = n01 / max(n00 + n01, 1)
    p11 = n11 / max(n10 + n11, 1)
    p1  = (n01 + n11) / max(n00 + n01 + n10 + n11, 1)

    # log-likelihoods
    ll_ind = (
        n00 * _safe_log(1 - p01) + n01 * _safe_log(p01) +
        n10 * _safe_log(1 - p11) + n11 * _safe_log(p11)
    )
    ll_iid = (
        (n00 + n10) * _safe_log(1 - p1) + (n01 + n11) * _safe_log(p1)
    )

    LR = -2.0 * (ll_iid - ll_ind)
    p = chi2_sf(LR, df=1)

    return {"LR_ind": float(LR), "p_value": float(p), "n00": float(n00), "n01": float(n01), "n10": float(n10), "n11": float(n11)}


def christoffersen_conditional_coverage(exceed: np.ndarray, alpha: float) -> Dict[str, float]:
    """
    Conditional coverage = POF + Independence (df=2).
    """
    pof = kupiec_pof(exceed, alpha)
    ind = christoffersen_independence(exceed)

    if not np.isfinite(pof["LR_pof"]) or not np.isfinite(ind["LR_ind"]):
        return {"LR_cc": float("nan"), "p_value": float("nan")}

    LR = float(pof["LR_pof"] + ind["LR_ind"])
    p = chi2_sf(LR, df=2)
    return {"LR_cc": float(LR), "p_value": float(p)}


# ----------------------------- Chi-square survival function (no scipy) -----------------------------
def chi2_sf(x: float, df: int) -> float:
    """
    Survival function for chi-square with integer df using incomplete gamma approximation.
    df=1,2 are most common here.
    For df=1: chi-square is distribution of Z^2.
    We'll use simple approximations:
      - df=1: sf = erfc(sqrt(x/2))
      - df=2: sf = exp(-x/2)
      - fallback: exp(-x/2) (rough)
    """
    if x < 0 or not np.isfinite(x):
        return float("nan")
    if df == 1:
        return float(math.erfc(math.sqrt(x / 2.0)))
    if df == 2:
        return float(math.exp(-x / 2.0))
    return float(math.exp(-x / 2.0))


# ----------------------------- Pipeline -----------------------------
def run_pipeline(cfg: Config) -> Dict[str, object]:
    np.random.seed(cfg.seed)

    print(f"[INFO] Downloading prices for {cfg.symbols} from {cfg.start} ...")
    prices = load_prices(cfg.symbols, cfg.start)

    rets = compute_returns(prices, cfg.use_log_returns)
    if cfg.dropna:
        rets = rets.dropna(how="any")

    if rets.empty or len(rets) < cfg.window + 50:
        raise RuntimeError(f"Not enough data after cleaning. rows={len(rets)} window={cfg.window}")

    # weights
    n = rets.shape[1]
    if cfg.weights is None:
        w = np.ones(n) / n
    else:
        if len(cfg.weights) != n:
            raise RuntimeError(f"--weights length {len(cfg.weights)} must match symbols length {n}")
        w = np.array(cfg.weights, dtype=float)
        s = w.sum()
        if s == 0 or not np.isfinite(s):
            raise RuntimeError("Weights sum to 0 or invalid.")
        w = w / s

    port = (rets.values @ w)
    port = pd.Series(port, index=rets.index, name="port_ret")

    # rolling VaR/ES
    var_es = rolling_historical_var_es(port, window=cfg.window, alpha=cfg.alpha)

    df = pd.concat([port, var_es], axis=1).dropna()
    # exceedance definition: return < VaR (VaR is alpha-quantile)
    exceed = (df["port_ret"].values < df["VaR"].values).astype(int)

    # tests
    pof = kupiec_pof(exceed, cfg.alpha)
    ind = christoffersen_independence(exceed)
    cc  = christoffersen_conditional_coverage(exceed, cfg.alpha)

    # simple performance stats
    ann_ret = float(df["port_ret"].mean() * 252.0)
    ann_vol = float(df["port_ret"].std(ddof=1) * math.sqrt(252.0))
    sharpe = float(ann_ret / ann_vol) if ann_vol > 0 else float("nan")

    summary = {
        "config": asdict(cfg),
        "data_window": {
            "start": str(df.index.min().date()),
            "end": str(df.index.max().date()),
            "n_obs": int(len(df)),
            "window": int(cfg.window),
        },
        "portfolio": {
            "symbols": list(cfg.symbols),
            "weights": [float(x) for x in w],
            "ann_ret": ann_ret,
            "ann_vol": ann_vol,
            "sharpe": sharpe,
        },
        "backtests": {
            "kupiec_pof": pof,
            "christoffersen_independence": ind,
            "christoffersen_conditional_coverage": cc,
        },
        "notes": [
            "Exceedance is defined as return < VaR(alpha) where VaR is rolling historical alpha-quantile.",
            "p-values are chi-square approximations with df=1 (POF, IND) and df=2 (CC).",
            "If p-value is small (<0.05), model may fail that test."
        ]
    }

    out = df.copy()
    out["exceed"] = exceed
    return {"series": out, "summary": summary}


def save_outputs(result: Dict[str, object], cfg: Config) -> None:
    series: pd.DataFrame = result["series"]  # type: ignore
    summary: Dict = result["summary"]        # type: ignore

    os.makedirs(os.path.dirname(cfg.out_csv) or ".", exist_ok=True)
    os.makedirs(os.path.dirname(cfg.out_json) or ".", exist_ok=True)

    series.to_csv(cfg.out_csv, index=True)
    with open(cfg.out_json, "w", encoding="utf-8") as f:
        json.dump(summary, f, indent=2)

    print(f"[OK] Saved series → {cfg.out_csv}")
    print(f"[OK] Saved summary → {cfg.out_json}")

    bt = summary["backtests"]
    print(
        f"[BT] Kupiec POF: LR={bt['kupiec_pof']['LR_pof']:.3f}, p={bt['kupiec_pof']['p_value']:.4f}, "
        f"hit_rate={bt['kupiec_pof']['hit_rate']:.4f} (target={cfg.alpha})"
    )
    print(
        f"[BT] Christoffersen IND: LR={bt['christoffersen_independence']['LR_ind']:.3f}, "
        f"p={bt['christoffersen_independence']['p_value']:.4f}"
    )
    print(
        f"[BT] Christoffersen CC: LR={bt['christoffersen_conditional_coverage']['LR_cc']:.3f}, "
        f"p={bt['christoffersen_conditional_coverage']['p_value']:.4f}"
    )


# ----------------------------- CLI -----------------------------
def parse_args() -> Config:
    p = argparse.ArgumentParser(description="Level-95: Rolling Historical VaR/ES + Kupiec/Christoffersen backtests")

    p.add_argument("--start", type=str, default=Config.start)
    p.add_argument("--symbols", nargs="+", default=list(Config.symbols))

    p.add_argument("--alpha", type=float, default=Config.alpha)
    p.add_argument("--window", type=int, default=Config.window)

    p.add_argument("--simple-returns", action="store_true")
    p.add_argument("--no-dropna", action="store_true")

    p.add_argument("--weights", nargs="+", type=float, default=None)

    p.add_argument("--seed", type=int, default=Config.seed)

    p.add_argument("--csv", type=str, default=Config.out_csv)
    p.add_argument("--json", type=str, default=Config.out_json)

    a = p.parse_args()

    weights = tuple(a.weights) if a.weights is not None else None

    return Config(
        symbols=tuple(a.symbols),
        start=a.start,
        alpha=float(a.alpha),
        window=int(a.window),
        use_log_returns=(not a.simple_returns),
        dropna=(not a.no_dropna),
        weights=weights,
        seed=int(a.seed),
        out_csv=a.csv,
        out_json=a.json,
    )


def main() -> None:
    cfg = parse_args()
    result = run_pipeline(cfg)
    save_outputs(result, cfg)


if __name__ == "__main__":
    # Jupyter/PyCharm shim: strip "-f kernel.json" etc.
    import sys
    sys.argv = [sys.argv[0]] + [
        arg for arg in sys.argv[1:]
        if arg != "-f" and not (arg.endswith(".json") and "kernel" in arg)
    ]
    main()


[INFO] Downloading prices for ('SPY', 'QQQ', 'IWM', 'EFA', 'EEM', 'TLT', 'LQD', 'GLD') from 2010-01-01 ...
[OK] Saved series → level95_var_es_series.csv
[OK] Saved summary → level95_var_es_backtest_summary.json
[BT] Kupiec POF: LR=0.115, p=0.7348, hit_rate=0.0487 (target=0.05)
[BT] Christoffersen IND: LR=11.300, p=0.0008
[BT] Christoffersen CC: LR=11.415, p=0.0033
