In [5]:
# level44_execution_sim.py
# Free-data Execution Simulator: TWAP/VWAP with participation caps & impact model.
# Data: yfinance intraday (1m ≤ 7d; else 5m). Outputs CSV + JSON.
#
# Usage (examples):
#   python level44_execution_sim.py --symbol SPY --side buy --qty 50000 --minutes 120 --strategy twap
#   python level44_execution_sim.py --symbol AAPL --side sell --qty 25000 --minutes 90 --strategy vwap --participation 0.12
#   python level44_execution_sim.py --symbol BTC-USD --side buy --qty 5 --minutes 60 --interval 1m --spread-bps 1.5

import os
import json
import argparse
from dataclasses import dataclass, asdict

import numpy as np
import pandas as pd
import yfinance as yf


# ----------------------------- Config -----------------------------
@dataclass
class Config:
    symbol: str = "SPY"
    side: str = "buy"                 # buy or sell
    qty: float = 50000                # parent quantity (shares/units)
    minutes: int = 120                # total schedule horizon (minutes)
    strategy: str = "twap"            # twap or vwap
    interval: str = "1m"              # 1m or 5m
    lookback_days: int = 5            # how many recent days to pull
    start_offset_min: int = 0         # start after N mins from first bar
    participation: float = 0.10       # max participation per bar (0..1)
    spread_bps: float = 2.0           # quoted spread (round-trip bps of mid); half-spread per fill
    k_temp: float = 0.30              # temp impact coefficient
    alpha: float = 0.60               # temp impact elasticity on (child/bar_vol)
    k_perm: float = 0.02              # permanent impact coefficient vs cum_exec/ADV
    vol_span: int = 20                # EWMA span for volatility estimate
    vol_floor_bps: float = 5.0        # minimum σ per bar in bps to avoid zero impact
    out_csv: str = "level44_exec_slices.csv"
    out_json: str = "level44_exec_summary.json"
    seed: int = 42                    # for any stochastic extensions (kept deterministic here)


# ----------------------------- Small helper -----------------------------
def to_float(x) -> float:
    """Safely convert scalars/Series/ndarrays to float without deprecated paths."""
    if isinstance(x, pd.Series):
        if x.size == 0:
            return float("nan")
        return float(x.iloc[0])
    if isinstance(x, np.ndarray):
        x = x.reshape(-1)
        if x.size == 0:
            return float("nan")
        return float(x[0])
    return float(x)


# ----------------------------- Data Loader -----------------------------
def load_intraday(cfg: Config) -> pd.DataFrame:
    # Enforce 1m only when lookback ≤ 7 days (yfinance limit). Else switch to 5m.
    interval = cfg.interval
    if interval == "1m" and cfg.lookback_days > 7:
        interval = "5m"

    period = f"{cfg.lookback_days}d"
    df = yf.download(
        cfg.symbol,
        period=period,
        interval=interval,
        auto_adjust=True,
        progress=False
    )
    if df.empty:
        raise RuntimeError("No intraday data returned (symbol/period/interval mismatch?).")

    # Normalize index to tz-naive (treat as UTC-like for math)
    if isinstance(df.index, pd.DatetimeIndex):
        try:
            df = df.tz_convert("UTC")
        except Exception:
            pass
        try:
            df = df.tz_localize(None)
        except Exception:
            pass

    df = df.rename(columns=str.title)
    need = ["Open", "High", "Low", "Close", "Volume"]
    for c in need:
        if c not in df.columns:
            raise RuntimeError(f"Missing column in data: {c}")
    df = df[need].dropna()
    return df


# ----------------------------- Volume Curve (VWAP) -----------------------------
def volume_curve_weights(bars: pd.DataFrame) -> pd.Series:
    """
    Estimate an intraday volume curve using a median by minute-of-day.
    """
    if "Volume" not in bars:
        raise ValueError("bars need Volume")
    i = bars.index
    if not isinstance(i, pd.DatetimeIndex):
        raise ValueError("bars index must be DatetimeIndex")

    mod = i.hour * 60 + i.minute
    df = bars.copy()
    df["mod"] = mod

    curve = df.groupby("mod")["Volume"].median()
    curve_for_bars = curve.reindex(mod)
    curve_for_bars = curve_for_bars.fillna(curve_for_bars.median())
    w = curve_for_bars / curve_for_bars.sum()
    w.index = bars.index
    return w


# ----------------------------- Volatility per bar -----------------------------
def bar_volatility_ewma(bars: pd.DataFrame, span: int, floor_bps: float) -> pd.Series:
    mid = (bars["High"] + bars["Low"]) / 2.0
    r = np.log(mid).diff().fillna(0.0)
    ew = r.ewm(span=span, adjust=False).std().fillna(0.0)
    sigma_bps = (ew * 1e4).clip(lower=floor_bps)
    return sigma_bps


# ----------------------------- Schedule Builder -----------------------------
def build_schedule(cfg: Config, bars: pd.DataFrame) -> pd.DataFrame:
    """
    Build per-bar target quantities for TWAP/VWAP over a horizon of cfg.minutes.
    """
    bars = bars.copy()
    start_idx = cfg.start_offset_min

    # Determine bar length (minutes) from index
    dt = (bars.index[1] - bars.index[0])
    bar_minutes = int(round(dt.total_seconds() / 60.0)) or 1
    n_slices = max(1, cfg.minutes // bar_minutes)

    window = bars.iloc[start_idx:start_idx + n_slices].copy()
    if window.empty or len(window) < 1:
        raise RuntimeError("Chosen minutes/start_offset exceed available intraday window.")

    strat = cfg.strategy.lower()
    if strat == "twap":
        w = pd.Series(1.0, index=window.index)
        w = w / w.sum()
    elif strat == "vwap":
        w = volume_curve_weights(window)
    else:
        raise ValueError("strategy must be 'twap' or 'vwap'")

    window["target_qty"] = cfg.qty * w

    window["Mid"] = (window["High"] + window["Low"]) / 2.0
    window["HalfSpread"] = window["Mid"] * (cfg.spread_bps * 0.0001) / 2.0
    window["Sigma_bps"] = bar_volatility_ewma(window, cfg.vol_span, cfg.vol_floor_bps)
    return window


# ----------------------------- ADV estimate -----------------------------
def estimate_ADV(bars: pd.DataFrame, n_days: int = 20) -> float:
    """
    Simple ADV estimate from last n_days of daily volume using intraday bars.
    """
    daily = bars["Volume"].resample("1D").sum()
    if daily.shape[0]:
        adv_val = daily.tail(n_days).mean()   # can be scalar or 1-element Series
        adv = to_float(adv_val)
    else:
        adv = to_float(bars["Volume"].mean() * 390.0)  # rough fallback

    return max(adv, 1.0)


# ----------------------------- Execution Engine -----------------------------
def simulate_exec(cfg: Config, window: pd.DataFrame, adv_est: float) -> pd.DataFrame:
    """
    Step through each bar. Enforce participation cap, price each child with spread + impact.
    Impact model:
       temp = k_temp * (child / max(1, bar_vol))^alpha * (sigma_bps/1e4) * Mid
       perm = k_perm * (cum_exec / ADV) * Mid
       fill = Mid ± HalfSpread ± temp ± perm   (± based on side)
    """
    side = cfg.side.lower()
    if side not in ("buy", "sell"):
        raise ValueError("side must be 'buy' or 'sell'")
    sgn = 1.0 if side == "buy" else -1.0

    remaining = cfg.qty
    cum_exec = 0.0
    rows = []

    arrival_mid = to_float(window["Mid"].iloc[0])

    for ts, row in window.iterrows():
        bar_vol = to_float(row["Volume"])
        target = to_float(row["target_qty"])
        mid = to_float(row["Mid"])
        half_spread = to_float(row["HalfSpread"])
        sigma_bps = to_float(row["Sigma_bps"])

        if remaining <= 0:
            rows.append({
                "time": ts,
                "bar_vol": bar_vol,
                "target": target,
                "child": 0.0,
                "mid": mid,
                "half_spread": half_spread,
                "sigma_bps": sigma_bps,
                "temp_impact": 0.0,
                "perm_impact": 0.0,
                "fill_px": np.nan,
                "cum_exec": cum_exec,
                "remaining": remaining
            })
            continue

        cap_qty = cfg.participation * bar_vol
        child = min(target, cap_qty, remaining)
        if child <= 0 or bar_vol <= 0:
            rows.append({
                "time": ts,
                "bar_vol": bar_vol,
                "target": target,
                "child": 0.0,
                "mid": mid,
                "half_spread": half_spread,
                "sigma_bps": sigma_bps,
                "temp_impact": 0.0,
                "perm_impact": 0.0,
                "fill_px": np.nan,
                "cum_exec": cum_exec,
                "remaining": remaining
            })
            continue

        ratio = min(1.0, child / max(1.0, bar_vol))
        temp_impact = cfg.k_temp * (ratio ** cfg.alpha) * (sigma_bps * 1e-4) * mid
        perm_impact = cfg.k_perm * (cum_exec / max(1.0, adv_est)) * mid

        fill_px = mid + sgn * (half_spread + temp_impact + perm_impact)

        cum_exec += child
        remaining -= child

        rows.append({
            "time": ts,
            "bar_vol": bar_vol,
            "target": target,
            "child": child,
            "mid": mid,
            "half_spread": half_spread,
            "sigma_bps": sigma_bps,
            "temp_impact": temp_impact,
            "perm_impact": perm_impact,
            "fill_px": fill_px,
            "cum_exec": cum_exec,
            "remaining": remaining
        })

    out = pd.DataFrame(rows).set_index("time")

    nonzero = out["child"] > 0
    if nonzero.any():
        fills = out.loc[nonzero, "fill_px"].to_numpy()
        qtys = out.loc[nonzero, "child"].to_numpy()
        exec_avg = float(np.average(fills, weights=qtys))
    else:
        exec_avg = arrival_mid

    if side == "buy":
        shortfall_bps = (exec_avg - arrival_mid) / arrival_mid * 1e4
    else:
        shortfall_bps = (arrival_mid - exec_avg) / arrival_mid * 1e4

    completed_qty = float(out["child"].sum())
    completion_pct = 100.0 * completed_qty / cfg.qty if cfg.qty > 0 else 0.0

    out.attrs["arrival_mid"] = float(arrival_mid)
    out.attrs["exec_avg"] = float(exec_avg)
    out.attrs["shortfall_bps"] = float(shortfall_bps)
    out.attrs["completed_qty"] = completed_qty
    out.attrs["completion_pct"] = completion_pct

    return out


# ----------------------------- I/O -----------------------------
def save_outputs(slices: pd.DataFrame, cfg: Config):
    os.makedirs(os.path.dirname(cfg.out_csv) or ".", exist_ok=True)
    os.makedirs(os.path.dirname(cfg.out_json) or ".", exist_ok=True)

    slices.to_csv(cfg.out_csv, index=True, date_format="%Y-%m-%d %H:%M:%S")

    summary = {
        "config": asdict(cfg),
        "arrival_mid": slices.attrs.get("arrival_mid"),
        "exec_avg": slices.attrs.get("exec_avg"),
        "shortfall_bps": slices.attrs.get("shortfall_bps"),
        "completed_qty": slices.attrs.get("completed_qty"),
        "completion_pct": slices.attrs.get("completion_pct"),
        "slices": int(slices.shape[0])
    }
    with open(cfg.out_json, "w") as f:
        json.dump(summary, f, indent=2)

    print(f"[OK] Saved slices → {cfg.out_csv}")
    print(f"[OK] Saved summary → {cfg.out_json}")
    print(
        f"Arrival mid: {summary['arrival_mid']:.6f} | "
        f"Exec avg: {summary['exec_avg']:.6f} | "
        f"Shortfall: {summary['shortfall_bps']:.2f} bps"
    )
    print(
        f"Completed: {summary['completed_qty']:.2f} "
        f"({summary['completion_pct']:.1f}%) across {summary['slices']} slices"
    )


# ----------------------------- CLI -----------------------------
def parse_args() -> Config:
    p = argparse.ArgumentParser(description="Level-44: Execution Simulator (TWAP/VWAP)")
    p.add_argument("--symbol", type=str, default="SPY")
    p.add_argument("--side", type=str, default="buy", choices=["buy", "sell"])
    p.add_argument("--qty", type=float, default=50000)
    p.add_argument("--minutes", type=int, default=120)
    p.add_argument("--strategy", type=str, default="twap", choices=["twap", "vwap"])
    p.add_argument("--interval", type=str, default="1m", choices=["1m", "5m"])
    p.add_argument("--lookback-days", type=int, default=5)
    p.add_argument("--start-offset-min", type=int, default=0)
    p.add_argument("--participation", type=float, default=0.10)
    p.add_argument("--spread-bps", type=float, default=2.0)
    p.add_argument("--k-temp", type=float, default=0.30)
    p.add_argument("--alpha", type=float, default=0.60)
    p.add_argument("--k-perm", type=float, default=0.02)
    p.add_argument("--vol-span", type=int, default=20)
    p.add_argument("--vol-floor-bps", type=float, default=5.0)
    p.add_argument("--csv", type=str, default="level44_exec_slices.csv")
    p.add_argument("--json", type=str, default="level44_exec_summary.json")
    p.add_argument("--seed", type=int, default=42)
    a = p.parse_args()
    return Config(
        symbol=a.symbol,
        side=a.side,
        qty=a.qty,
        minutes=a.minutes,
        strategy=a.strategy,
        interval=a.interval,
        lookback_days=a.lookback_days,
        start_offset_min=a.start_offset_min,
        participation=a.participation,
        spread_bps=a.spread_bps,
        k_temp=a.k_temp,
        alpha=a.alpha,
        k_perm=a.k_perm,
        vol_span=a.vol_span,
        vol_floor_bps=a.vol_floor_bps,
        out_csv=a.csv,
        out_json=a.json,
        seed=a.seed,
    )


# ----------------------------- Main -----------------------------
def main():
    cfg = parse_args()
    np.random.seed(cfg.seed)

    bars = load_intraday(cfg)
    adv = estimate_ADV(bars, n_days=20)

    window = build_schedule(cfg, bars)
    slices = simulate_exec(cfg, window, adv_est=adv)
    save_outputs(slices, cfg)


if __name__ == "__main__":
    # Jupyter/PyCharm cell shim: strip "-f kernel.json" etc.
    import sys
    sys.argv = [sys.argv[0]] + [
        arg for arg in sys.argv[1:]
        if arg != "-f" and not (arg.endswith(".json") and "kernel" in arg)
    ]
    main()


[OK] Saved slices → level44_exec_slices.csv
[OK] Saved summary → level44_exec_summary.json
Arrival mid: 676.484985 | Exec avg: 673.401266 | Shortfall: -45.58 bps
Completed: 50000.00 (100.0%) across 120 slices
