In [3]:
# level43_orderbook_imbalance.py
# Public L2 order-book imbalance with automatic Binance→Coinbase fallback.
# No API keys. Robust retries, UTC timestamps, CLI + Jupyter-friendly shim.
#
# Usage (terminal):
#   python level43_orderbook_imbalance.py --symbol BTCUSDT --levels 20 --interval 1.0 --minutes 3
# Or inside notebooks, it will ignore Jupyter's -f args automatically.

import os
import time
import json
import argparse
from dataclasses import dataclass, asdict
from typing import Optional, Dict, Tuple, List

import numpy as np
import pandas as pd
import requests
from requests.adapters import HTTPAdapter, Retry


# ----------------------------- Config -----------------------------
@dataclass
class Config:
    symbol: str = "BTCUSDT"           # Binance symbol; fallback maps to Coinbase (BTC-USD)
    levels: int = 20                  # top-L levels per side
    interval: float = 1.0             # seconds between polls
    minutes: float = 2.0              # total duration
    ema: int = 30                     # EWMA span for OBI smoothing
    out_csv: str = "level43_orderbook_imbalance.csv"
    out_json: str = "level43_orderbook_imbalance_metrics.json"
    timeout: float = 8.0
    verify_ssl: bool = True

    # endpoints
    binance_base: str = "https://api.binance.com"
    coinbase_base: str = "https://api.exchange.coinbase.com"  # public

# ----------------------------- HTTP session -----------------------------
def build_session(cfg: Config) -> requests.Session:
    sess = requests.Session()
    retries = Retry(
        total=5,
        backoff_factor=0.4,
        status_forcelist=(429, 500, 502, 503, 504),
        allowed_methods=("GET",),
        raise_on_status=False,
    )
    adapter = HTTPAdapter(max_retries=retries)
    sess.mount("https://", adapter)
    sess.mount("http://", adapter)
    sess.verify = cfg.verify_ssl
    return sess

# ----------------------------- Helpers -----------------------------
def now_utc() -> pd.Timestamp:
    return pd.Timestamp.now(tz="UTC")

def coinbase_symbol(binance_sym: str) -> str:
    # crude mapper: BTCUSDT -> BTC-USD, ETHUSDT -> ETH-USD, else pass-through if already has '-'
    if "-" in binance_sym:
        return binance_sym
    if binance_sym.endswith("USDT"):
        return binance_sym[:-4] + "-USD"
    if binance_sym.endswith("USD"):
        return binance_sym[:-3] + "-USD"
    # default guess
    return binance_sym + "-USD"

# ----------------------------- Fetchers -----------------------------
def fetch_depth_binance(sess: requests.Session, cfg: Config, levels: int) -> Dict:
    url = f"{cfg.binance_base}/api/v3/depth"
    params = {"symbol": cfg.symbol, "limit": min(max(levels, 5), 500)}
    r = sess.get(url, params=params, timeout=cfg.timeout)
    r.raise_for_status()
    j = r.json()
    # standardize shape
    return {
        "exchange": "binance",
        "bids": [[float(p), float(q)] for p, q in j.get("bids", [])],
        "asks": [[float(p), float(q)] for p, q in j.get("asks", [])],
        "best_bid": float(j["bids"][0][0]) if j.get("bids") else np.nan,
        "best_ask": float(j["asks"][0][0]) if j.get("asks") else np.nan
    }

def fetch_depth_coinbase(sess: requests.Session, cfg: Config, levels: int) -> Dict:
    prod = coinbase_symbol(cfg.symbol)
    # level=2 returns aggregated price levels
    url = f"{cfg.coinbase_base}/products/{prod}/book"
    params = {"level": 2}
    r = sess.get(url, params=params, timeout=cfg.timeout, headers={"User-Agent": "level43-imbalance"})
    r.raise_for_status()
    j = r.json()
    bids = [[float(p), float(q)] for p, q, _n in j.get("bids", [])]
    asks = [[float(p), float(q)] for p, q, _n in j.get("asks", [])]
    bids = bids[: min(levels, len(bids))]
    asks = asks[: min(levels, len(asks))]
    return {
        "exchange": "coinbase",
        "bids": bids,
        "asks": asks,
        "best_bid": bids[0][0] if bids else np.nan,
        "best_ask": asks[0][0] if asks else np.nan
    }

def fetch_depth(sess: requests.Session, cfg: Config, levels: int) -> Dict:
    """
    Try Binance; if blocked (451/403) or error, fall back to Coinbase.
    """
    try:
        return fetch_depth_binance(sess, cfg, levels)
    except requests.HTTPError as e:
        code = e.response.status_code if e.response is not None else None
        if code in (451, 403) or code in (429, 500, 502, 503, 504):
            # fallback
            return fetch_depth_coinbase(sess, cfg, levels)
        raise
    except Exception:
        # generic fallback
        return fetch_depth_coinbase(sess, cfg, levels)

# ----------------------------- Math -----------------------------
def topL_vol_and_pw(bids: List[List[float]], asks: List[List[float]], L: int) -> Tuple[float, float, float, float, float]:
    B = bids[:L]
    A = asks[:L]
    bid_vol = sum(q for _p, q in B)
    ask_vol = sum(q for _p, q in A)
    bid_pw  = sum(p*q for p, q in B)
    ask_pw  = sum(p*q for p, q in A)
    best_bid = B[0][0] if B else np.nan
    best_ask = A[0][0] if A else np.nan
    mid = (best_bid + best_ask)/2.0 if np.isfinite(best_bid) and np.isfinite(best_ask) else np.nan
    return bid_vol, ask_vol, bid_pw, ask_pw, mid

def safe_ratio(num: float, den: float) -> float:
    if den <= 0 or not np.isfinite(den):
        return np.nan
    return num/den

# ----------------------------- Collector loop -----------------------------
def collect_orderbook(cfg: Config) -> pd.DataFrame:
    sess = build_session(cfg)
    rows = []
    end_time = time.time() + 60.0 * cfg.minutes

    while time.time() < end_time:
        t0 = time.time()
        try:
            d = fetch_depth(sess, cfg, cfg.levels)
            bids, asks = d["bids"], d["asks"]
            if not bids or not asks:
                raise ValueError("Empty depth arrays")

            bid_vol, ask_vol, bid_pw, ask_pw, mid = topL_vol_and_pw(bids, asks, cfg.levels)
            obi    = safe_ratio(bid_vol - ask_vol, bid_vol + ask_vol)     # volume OBI
            obi_pw = safe_ratio(bid_pw  - ask_pw,  bid_pw  + ask_pw)      # price-weighted OBI

            ts = now_utc()
            rows.append({
                "timestamp": ts,
                "exchange": d["exchange"],
                "best_bid": float(d["best_bid"]),
                "best_ask": float(d["best_ask"]),
                "mid": float(mid) if np.isfinite(mid) else np.nan,
                "bid_vol_L": float(bid_vol),
                "ask_vol_L": float(ask_vol),
                "obi": float(obi) if np.isfinite(obi) else np.nan,
                "obi_pw": float(obi_pw) if np.isfinite(obi_pw) else np.nan
            })
        except Exception:
            # preserve cadence with NaN row
            ts = now_utc()
            rows.append({
                "timestamp": ts, "exchange": "error",
                "best_bid": np.nan, "best_ask": np.nan, "mid": np.nan,
                "bid_vol_L": np.nan, "ask_vol_L": np.nan, "obi": np.nan, "obi_pw": np.nan
            })

        # cadence sleep
        elapsed = time.time() - t0
        wait = max(0.0, cfg.interval - elapsed)
        time.sleep(wait)

    df = pd.DataFrame(rows).set_index("timestamp").sort_index()
    return df

# ----------------------------- Features & Metrics -----------------------------
def add_features(df: pd.DataFrame, cfg: Config) -> pd.DataFrame:
    out = df.copy()
    out["obi_ema"] = out["obi"].ewm(span=cfg.ema, adjust=False, min_periods=5).mean()
    out["obi_pw_ema"] = out["obi_pw"].ewm(span=cfg.ema, adjust=False, min_periods=5).mean()
    out["mid_ret"] = np.log(out["mid"]).diff()
    out["mid_ret_fwd"] = (np.log(out["mid"].shift(-1)) - np.log(out["mid"]))
    # z-score with small window guard
    roll = max(60, int(60/cfg.interval))
    m = out["obi"].rolling(roll, min_periods=roll//3).mean()
    s = out["obi"].rolling(roll, min_periods=roll//3).std()
    out["obi_z"] = (out["obi"] - m) / (s + 1e-12)
    thr = out["obi"].abs().quantile(0.90)
    out["obi_extreme_flag"] = (out["obi"].abs() >= thr).astype(int)
    return out

def summarize(df: pd.DataFrame, cfg: Config) -> Dict:
    stats = {
        "samples": int(df.shape[0]),
        "levels": cfg.levels,
        "interval_sec": cfg.interval,
        "duration_min": cfg.minutes,
        "ema_span": cfg.ema,
        "obi_mean": float(df["obi"].dropna().mean()) if "obi" in df else np.nan,
        "obi_pw_mean": float(df["obi_pw"].dropna().mean()) if "obi_pw" in df else np.nan,
        "obi_abs_p90": float(df["obi"].abs().quantile(0.90)) if df["obi"].notna().any() else np.nan,
    }
    stats["corr_obi_midret"] = float(df["obi"].corr(df["mid_ret"])) if df["obi"].notna().any() else np.nan
    stats["corr_obi_lead_midret"] = float(df["obi"].corr(df["mid_ret_fwd"])) if df["obi"].notna().any() else np.nan
    return stats

# ----------------------------- I/O -----------------------------
def save_outputs(df: pd.DataFrame, metrics: Dict, cfg: Config):
    os.makedirs(os.path.dirname(cfg.out_csv) or ".", exist_ok=True)
    os.makedirs(os.path.dirname(cfg.out_json) or ".", exist_ok=True)
    df.to_csv(cfg.out_csv, index=True, date_format="%Y-%m-%dT%H:%M:%S.%fZ")
    with open(cfg.out_json, "w") as f:
        json.dump({"config": asdict(cfg), "metrics": metrics}, f, indent=2)
    print(f"[OK] Saved CSV  → {cfg.out_csv}")
    print(f"[OK] Saved JSON → {cfg.out_json}")
    print("Metrics:", {k: (round(v, 6) if isinstance(v, float) else v) for k, v in metrics.items()})

# ----------------------------- CLI -----------------------------
def parse_args() -> Config:
    p = argparse.ArgumentParser(description="Level-43: Order-Book Imbalance (Binance→Coinbase fallback)")
    p.add_argument("--symbol", type=str, default="BTCUSDT")
    p.add_argument("--levels", type=int, default=20)
    p.add_argument("--interval", type=float, default=1.0)
    p.add_argument("--minutes", type=float, default=2.0)
    p.add_argument("--ema", type=int, default=30)
    p.add_argument("--csv", type=str, default="level43_orderbook_imbalance.csv")
    p.add_argument("--json", type=str, default="level43_orderbook_imbalance_metrics.json")
    p.add_argument("--timeout", type=float, default=8.0)
    p.add_argument("--no-verify-ssl", action="store_true")
    a = p.parse_args()
    return Config(
        symbol=a.symbol, levels=a.levels, interval=a.interval, minutes=a.minutes, ema=a.ema,
        out_csv=a.csv, out_json=a.json, timeout=a.timeout, verify_ssl=not a.no_verify_ssl
    )

# ----------------------------- Main -----------------------------
def main():
    cfg = parse_args()
    df = collect_orderbook(cfg)
    df = add_features(df, cfg)
    metrics = summarize(df, cfg)
    save_outputs(df, metrics, cfg)

if __name__ == "__main__":
    # Jupyter shim: strip unwanted args like "-f kernel.json"
    import sys
    sys.argv = [sys.argv[0]] + [arg for arg in sys.argv[1:] if arg != "-f" and not arg.endswith(".json")]
    main()


[OK] Saved CSV  → level43_orderbook_imbalance.csv
[OK] Saved JSON → level43_orderbook_imbalance_metrics.json
Metrics: {'samples': 120, 'levels': 20, 'interval_sec': 1.0, 'duration_min': 2.0, 'ema_span': 30, 'obi_mean': -0.118004, 'obi_pw_mean': -0.118076, 'obi_abs_p90': 0.282508, 'corr_obi_midret': -0.121264, 'corr_obi_lead_midret': -0.018338}
