In [None]:
# level79_tcopula_var_es.py
# Level-79: t-Copula Tail-Risk Simulator (VaR/ES) using ONLY free data + numpy/pandas/yfinance.
#
# What this script does:
# 1) Downloads daily adjusted close prices for a basket of ETFs.
# 2) Computes daily log returns.
# 3) Fits (calibrates) a t-copula degrees-of-freedom (nu) over a grid using pseudo log-likelihood.
# 4) Simulates joint return scenarios via the fitted t-copula + empirical marginals.
# 5) Computes portfolio PnL distribution and VaR/ES at chosen alpha(s).
# 6) Saves a daily panel CSV + summary JSON.
#
# Key Fix Included:
# - Uses np.trapezoid if available; otherwise falls back to np.trapz.
#   (Prevents: AttributeError: module 'numpy' has no attribute 'trapzoid')
#
# Usage:
#   python level79_tcopula_var_es.py
#   python level79_tcopula_var_es.py --start 2010-01-01 --sims 50000 --alphas 0.95 0.99
#   python level79_tcopula_var_es.py --weights 0.25 0.25 0.25 0.25 0 0 0 0
#
# Notes:
# - This is intentionally scipy-free. Numerical integration + root find are implemented here.
# - It will run slower than scipy-based code (especially PPF), but it is portable.

import os
import json
import math
import argparse
from dataclasses import dataclass, asdict
from typing import List, Dict, Tuple, Optional

import numpy as np
import pandas as pd
import yfinance as yf


# ----------------------------- Config -----------------------------
@dataclass
class Config:
    symbols: Tuple[str, ...] = ("SPY", "QQQ", "IWM", "EFA", "EEM", "TLT", "LQD", "GLD")
    start: str = "2010-01-01"
    nu_grid: Tuple[int, ...] = (4, 6, 8, 10, 15, 20)
    corr_shrink: float = 0.05  # shrink corr toward identity to ensure PD
    sims: int = 50000
    seed: int = 42

    # VaR/ES settings (confidence levels)
    alphas: Tuple[float, ...] = (0.95, 0.99)

    # Output
    out_csv: str = "level79_tcopula_var_es.csv"
    out_json: str = "level79_tcopula_var_es_summary.json"

    # Portfolio settings
    weights: Optional[List[float]] = None  # if None -> equal weight
    notional: float = 1_000_000.0          # portfolio notional for PnL in $
    horizon_days: int = 1                  # 1-day VaR/ES


# ----------------------------- Utilities -----------------------------
def trapz_compat(y: np.ndarray, x: np.ndarray) -> float:
    """
    NumPy >= 2.0 prefers trapezoid; older versions have trapz.
    Also avoids the user's typo `trapzoid`.
    """
    if hasattr(np, "trapezoid"):
        return float(np.trapezoid(y, x))
    return float(np.trapz(y, x))


def ensure_pos_def_corr(corr: np.ndarray, shrink: float = 0.05) -> np.ndarray:
    """
    Shrink correlation matrix toward identity and ensure positive definiteness
    via eigenvalue clipping.
    """
    n = corr.shape[0]
    corr = (1.0 - shrink) * corr + shrink * np.eye(n)
    corr = 0.5 * (corr + corr.T)

    # Eigenvalue clip
    vals, vecs = np.linalg.eigh(corr)
    vals = np.clip(vals, 1e-8, None)
    corr_pd = vecs @ np.diag(vals) @ vecs.T

    # Renormalize to correlation
    d = np.sqrt(np.diag(corr_pd))
    corr_pd = corr_pd / np.outer(d, d)
    corr_pd = np.clip(corr_pd, -0.9999, 0.9999)
    np.fill_diagonal(corr_pd, 1.0)
    return corr_pd


def rank_to_uniform(x: np.ndarray) -> np.ndarray:
    """
    Convert data to pseudo-uniforms using rank/(n+1) per column.
    """
    n = x.shape[0]
    u = np.empty_like(x, dtype=float)
    for j in range(x.shape[1]):
        order = np.argsort(x[:, j])
        ranks = np.empty(n, dtype=float)
        ranks[order] = np.arange(1, n + 1, dtype=float)
        u[:, j] = ranks / (n + 1.0)
    # keep away from exact 0/1
    eps = 1e-12
    return np.clip(u, eps, 1.0 - eps)


# ----------------------------- Data -----------------------------
def load_prices(symbols: Tuple[str, ...], start: str) -> pd.DataFrame:
    frames = []
    for s in symbols:
        px = yf.download(s, start=start, auto_adjust=True, progress=False)
        if px.empty:
            raise RuntimeError(f"No data returned for symbol: {s}")
        if "Close" not in px.columns:
            raise RuntimeError(f"'Close' column missing for {s}")
        close = px["Close"].copy()
        close.name = s
        frames.append(close)
    prices = pd.concat(frames, axis=1).sort_index()
    prices = prices.dropna(how="any")
    return prices


def compute_log_returns(prices: pd.DataFrame) -> pd.DataFrame:
    rets = np.log(prices).diff().dropna()
    rets = rets.replace([np.inf, -np.inf], np.nan).dropna()
    return rets


# ----------------------------- Student-t PDF/CDF/PPF (scipy-free) -----------------------------
def t_log_pdf(x: np.ndarray, nu: float) -> np.ndarray:
    """
    log pdf of Student-t with df=nu, location 0, scale 1.
    """
    # log Γ((nu+1)/2) - log Γ(nu/2) - 0.5 log(nuπ) - ((nu+1)/2) log(1 + x^2/nu)
    a = math.lgamma((nu + 1.0) / 2.0) - math.lgamma(nu / 2.0)
    b = -0.5 * math.log(nu * math.pi)
    c = -((nu + 1.0) / 2.0) * np.log1p((x * x) / nu)
    return a + b + c


def t_cdf_scalar(x: float, nu: float, n_steps: int = 4001) -> float:
    """
    Numerically integrate t pdf from 0 to |x| and use symmetry:
      CDF(x) = 0.5 + sign(x) * ∫_0^{|x|} f(t) dt
    """
    if x == 0.0:
        return 0.5
    sign = 1.0 if x > 0 else -1.0
    ax = abs(x)

    # integrate over [0, ax] with an odd number of steps for smoother trapezoid
    xs = np.linspace(0.0, ax, int(n_steps))
    logf = t_log_pdf(xs, nu)
    f = np.exp(logf)

    area = trapz_compat(f, xs)
    cdf = 0.5 + sign * area
    # numerical safety
    return float(np.clip(cdf, 1e-12, 1.0 - 1e-12))


def t_ppf_scalar(u: float, nu: float) -> float:
    """
    Inverse CDF via bisection using the numeric CDF above.
    Works for u in (0,1).
    """
    u = float(np.clip(u, 1e-12, 1.0 - 1e-12))
    if u == 0.5:
        return 0.0

    # symmetry
    if u < 0.5:
        return -t_ppf_scalar(1.0 - u, nu)

    target = u
    lo, hi = 0.0, 10.0

    # widen hi until cdf(hi) >= target
    while t_cdf_scalar(hi, nu) < target:
        hi *= 2.0
        if hi > 200.0:
            break

    # bisection
    for _ in range(80):
        mid = 0.5 * (lo + hi)
        cmid = t_cdf_scalar(mid, nu)
        if cmid < target:
            lo = mid
        else:
            hi = mid
    return 0.5 * (lo + hi)


def t_ppf(U: np.ndarray, nu: float) -> np.ndarray:
    """
    Elementwise t-ppf. (Slow but portable; avoids scipy dependency.)
    """
    out = np.empty_like(U, dtype=float)
    it = np.nditer(U, flags=["multi_index"])
    while not it.finished:
        out[it.multi_index] = t_ppf_scalar(float(it[0]), nu)
        it.iternext()
    return out


# ----------------------------- t-Copula Calibration -----------------------------
def pseudo_log_likelihood_tcopula(U: np.ndarray, corr: np.ndarray, nu: float) -> float:
    """
    Pseudo log-likelihood for t-copula.
    We transform U -> Z via t^{-1}_nu(U), then evaluate multivariate t copula density ratio.
    For practicality, we compute:
      pll = log c_t(Z; corr, nu) - sum log f_t(Z_i; nu)   (constants cancel in selection)
    But we only need relative pll across nu grid.
    """
    # Z: T x N
    Z = t_ppf(U, nu)

    # Cholesky for quadratic form
    try:
        L = np.linalg.cholesky(corr)
    except np.linalg.LinAlgError:
        corr = ensure_pos_def_corr(corr, shrink=0.10)
        L = np.linalg.cholesky(corr)

    # Solve y = L^{-1} z rowwise
    # quadratic term: z^T corr^{-1} z = ||y||^2
    Y = np.linalg.solve(L, Z.T).T  # T x N

    q = np.sum(Y * Y, axis=1)  # length T
    n = Z.shape[1]
    # log multivariate t density up to constants:
    # -0.5*log|corr| - (nu+n)/2 * log(1 + q/nu)
    # minus sum of univariate t log-pdfs (which depend on each z_i)
    logdet = 2.0 * np.sum(np.log(np.diag(L)))
    mv_part = -0.5 * logdet - ((nu + n) / 2.0) * np.log1p(q / nu)

    uni = t_log_pdf(Z, nu)  # T x N
    uni_sum = np.sum(uni, axis=1)

    pll = float(np.sum(mv_part - uni_sum))
    return pll


def calibrate_tcopula(rets: pd.DataFrame, nu_grid: Tuple[int, ...], corr_shrink: float) -> Dict:
    X = rets.values
    U = rank_to_uniform(X)

    # Start with normal-score correlation proxy: use empirical corr of returns,
    # then shrink & PD-fix. (We re-estimate corr each nu using Z; but this is initial baseline.)
    base_corr = np.corrcoef(X, rowvar=False)
    base_corr = ensure_pos_def_corr(base_corr, shrink=corr_shrink)

    best = {"nu": None, "pll": -np.inf, "corr": None}

    # For each nu:
    for nu in nu_grid:
        # Transform U to Z under this nu, then compute corr(Z) and PD-fix
        Z = t_ppf(U, float(nu))
        corr = np.corrcoef(Z, rowvar=False)
        corr = ensure_pos_def_corr(corr, shrink=corr_shrink)

        pll = pseudo_log_likelihood_tcopula(U, corr, float(nu))
        if pll > best["pll"]:
            best = {"nu": int(nu), "pll": float(pll), "corr": corr}

    return best


# ----------------------------- Simulation -----------------------------
def simulate_tcopula_returns(
    hist_rets: pd.DataFrame,
    corr: np.ndarray,
    nu: float,
    sims: int,
    seed: int
) -> np.ndarray:
    """
    Simulate returns from fitted t-copula + empirical marginals.
    Steps:
    1) Sample multivariate t latent: Y = (L @ g) / sqrt(w/nu), g~N(0,I), w~ChiSq(nu)
    2) Convert to uniforms via t-cdf (numeric) elementwise
    3) Map uniforms to empirical marginal via quantiles of historical returns
    """
    rng = np.random.default_rng(seed)
    n = hist_rets.shape[1]
    L = np.linalg.cholesky(ensure_pos_def_corr(corr, shrink=0.0))

    # Precompute sorted historical returns per asset for inverse-empirical-CDF
    hist_sorted = np.sort(hist_rets.values, axis=0)
    T = hist_sorted.shape[0]

    # sample Gaussian
    g = rng.standard_normal(size=(sims, n))
    z = (g @ L.T)

    # chi-square via gamma(k=nu/2, theta=2)
    w = rng.gamma(shape=nu / 2.0, scale=2.0, size=(sims, 1))
    y = z / np.sqrt(w / nu)

    # convert each y_ij -> u_ij via t-cdf (numeric scalar, slow but deterministic)
    U = np.empty_like(y, dtype=float)
    it = np.nditer(y, flags=["multi_index"])
    while not it.finished:
        U[it.multi_index] = t_cdf_scalar(float(it[0]), float(nu))
        it.iternext()

    # empirical inverse CDF: index = floor(u*(T-1))
    idx = np.floor(U * (T - 1)).astype(int)
    idx = np.clip(idx, 0, T - 1)

    sim = np.empty_like(U, dtype=float)
    for j in range(n):
        sim[:, j] = hist_sorted[idx[:, j], j]

    return sim  # sims x n


# ----------------------------- Risk Metrics -----------------------------
def var_es(pnl: np.ndarray, alpha: float) -> Tuple[float, float]:
    """
    pnl is array of PnL (positive = profit, negative = loss)
    VaR_alpha as positive number (loss threshold) at confidence alpha
    ES_alpha as positive number (expected loss beyond VaR)
    """
    # Losses are -pnl
    losses = -pnl
    q = float(np.quantile(losses, alpha))
    tail = losses[losses >= q]
    es = float(np.mean(tail)) if tail.size else q
    return q, es


# ----------------------------- Pipeline -----------------------------
def run_pipeline(cfg: Config) -> Tuple[pd.DataFrame, Dict]:
    np.random.seed(cfg.seed)

    print(f"[INFO] Downloading prices for {cfg.symbols} from {cfg.start} ...")
    prices = load_prices(cfg.symbols, cfg.start)
    rets = compute_log_returns(prices)
    print(f"[INFO] Got {len(prices)} price rows, {len(rets)} return rows, assets={rets.shape[1]}")

    # Calibrate nu + corr
    print(f"[INFO] Calibrating t-copula nu over grid: {cfg.nu_grid} ...")
    calib = calibrate_tcopula(rets, cfg.nu_grid, cfg.corr_shrink)
    nu_hat = float(calib["nu"])
    corr_hat = calib["corr"]
    print(f"[INFO] Best nu={int(nu_hat)} (pseudo-LL={calib['pll']:.2f})")

    # Portfolio weights
    n = rets.shape[1]
    if cfg.weights is None:
        w = np.ones(n) / n
    else:
        w = np.asarray(cfg.weights, dtype=float)
        if w.size != n:
            raise ValueError(f"--weights length must be {n}, got {w.size}")
        if not np.isfinite(w).all():
            raise ValueError("weights contain non-finite values")
        s = float(np.sum(w))
        if abs(s) < 1e-12:
            raise ValueError("weights sum to zero")
        w = w / s

    # Simulate horizon returns (1-day): use simulated daily returns
    sim_rets = simulate_tcopula_returns(rets, corr_hat, nu_hat, cfg.sims, cfg.seed)

    # Convert to portfolio PnL
    # For small returns: pnl ≈ notional * (w · r)
    port_r = sim_rets @ w
    pnl = cfg.notional * port_r

    # VaR/ES for each alpha
    risk = {}
    for a in cfg.alphas:
        v, e = var_es(pnl, float(a))
        risk[str(a)] = {"VaR": v, "ES": e}

    # Output DataFrame: historical series + a “portfolio ret” column
    out = pd.DataFrame(index=rets.index)
    out[[f"ret_{c}" for c in rets.columns]] = rets.add_prefix("ret_")
    out["port_ret_hist"] = rets.values @ w

    summary = {
        "config": asdict(cfg),
        "calibration": {
            "nu": int(nu_hat),
            "pseudo_ll": float(calib["pll"]),
            "corr_shrink": float(cfg.corr_shrink),
        },
        "portfolio": {
            "symbols": list(cfg.symbols),
            "weights": [float(x) for x in w.tolist()],
            "notional": float(cfg.notional),
            "horizon_days": int(cfg.horizon_days),
        },
        "risk": risk,
        "data_window": {
            "start": str(rets.index.min().date()),
            "end": str(rets.index.max().date()),
            "n_returns": int(len(rets)),
        },
    }
    return out, summary


def save_outputs(out: pd.DataFrame, summary: Dict, cfg: Config) -> None:
    os.makedirs(os.path.dirname(cfg.out_csv) or ".", exist_ok=True)
    os.makedirs(os.path.dirname(cfg.out_json) or ".", exist_ok=True)

    out.to_csv(cfg.out_csv)
    with open(cfg.out_json, "w", encoding="utf-8") as f:
        json.dump(summary, f, indent=2)

    print(f"[OK] Saved daily panel → {cfg.out_csv}")
    print(f"[OK] Saved summary → {cfg.out_json}")
    for a, d in summary["risk"].items():
        print(f"alpha={a}: VaR=${d['VaR']:.2f}, ES=${d['ES']:.2f}")


# ----------------------------- CLI -----------------------------
def parse_args() -> Config:
    p = argparse.ArgumentParser(description="Level-79: t-Copula Tail Risk (VaR/ES) - SciPy-free")

    p.add_argument("--start", type=str, default="2010-01-01")
    p.add_argument("--symbols", nargs="+", default=list(Config.symbols))
    p.add_argument("--nu-grid", nargs="+", type=int, default=list(Config.nu_grid))
    p.add_argument("--corr-shrink", type=float, default=0.05)

    p.add_argument("--sims", type=int, default=50000)
    p.add_argument("--seed", type=int, default=42)

    p.add_argument("--alphas", nargs="+", type=float, default=[0.95, 0.99])
    p.add_argument("--weights", nargs="+", type=float, default=None)
    p.add_argument("--notional", type=float, default=1_000_000.0)

    p.add_argument("--csv", type=str, default="level79_tcopula_var_es.csv")
    p.add_argument("--json", type=str, default="level79_tcopula_var_es_summary.json")

    a = p.parse_args()

    return Config(
        symbols=tuple(a.symbols),
        start=a.start,
        nu_grid=tuple(a.nu_grid),
        corr_shrink=float(a.corr_shrink),
        sims=int(a.sims),
        seed=int(a.seed),
        alphas=tuple(a.alphas),
        weights=None if a.weights is None else list(a.weights),
        notional=float(a.notional),
        out_csv=a.csv,
        out_json=a.json,
    )


def main() -> None:
    cfg = parse_args()
    out, summary = run_pipeline(cfg)
    save_outputs(out, summary, cfg)


if __name__ == "__main__":
    # Jupyter/PyCharm cell shim: strip "-f kernel.json" etc.
    import sys
    sys.argv = [sys.argv[0]] + [
        arg for arg in sys.argv[1:]
        if arg != "-f" and not (arg.endswith(".json") and "kernel" in arg)
    ]
    main()


[INFO] Downloading prices for ('SPY', 'QQQ', 'IWM', 'EFA', 'EEM', 'TLT', 'LQD', 'GLD') from 2010-01-01 ...
[INFO] Got 4013 price rows, 4012 return rows, assets=8
[INFO] Calibrating t-copula nu over grid: (4, 6, 8, 10, 15, 20) ...
