In [1]:
!pip install -U pip



In [2]:
!pip install pandas numpy scikit-learn xgboost lightgbm



In [3]:
!pip install torch --index-url https://download.pytorch.org/whl/cpu

Looking in indexes: https://download.pytorch.org/whl/cpu


In [4]:
!pip install shap web3 requests tenacity sentence-transformers duckdb



In [5]:
!pip install matplotlib seaborn plotly weasyprint fpdf2 pyarrow



In [6]:
!pip install faiss-cpu



In [7]:
from __future__ import annotations
import os, sys, json, math, time, hashlib, unicodedata, warnings, random, re
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, List, Tuple, Sequence, Dict
from datetime import datetime, timezone
import numpy as np
import pandas as pd

In [8]:
warnings.filterwarnings("ignore", category=UserWarning)
SEED = 42
np.random.seed(SEED)
random.seed(SEED)

In [9]:
try:
    import xgboost as xgb
    _XGB_OK = True
except Exception as e:
    _XGB_OK = False
    print(f"[warn] xgboost unavailable: {e}")

In [10]:
try:
    import torch
    import torch.nn as nn
    from torch.utils.data import DataLoader, TensorDataset
    _TORCH_OK = True
    torch.manual_seed(SEED)
except Exception as e:
    _TORCH_OK = False
    print(f"[note] torch unavailable: {e}")

In [11]:
try:
    from web3 import Web3, __version__ as _w3v
    _WEB3_OK = True
except Exception as e:
    _WEB3_OK = False
    print(f"[note] web3 unavailable: {e}")

In [12]:
try:
    from fpdf import FPDF, HTMLMixin
    _FPDF_OK = True
except Exception as e:
    _FPDF_OK = False
    print(f"[warn] fpdf2 unavailable: {e}")

In [13]:
try:
    from sklearn.ensemble import IsolationForest
    from sklearn.neighbors import LocalOutlierFactor
    from sklearn.svm import OneClassSVM
    from sklearn.preprocessing import StandardScaler
    from sklearn.calibration import CalibratedClassifierCV
    from sklearn.metrics import average_precision_score, brier_score_loss
    from sklearn.inspection import permutation_importance
    from joblib import dump, load
    _SK_OK = True
except Exception as e:
    _SK_OK = False
    print(f"[warn] scikit-learn unavailable: {e}")

In [14]:
OUTPUT_BASE = Path(r"C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel")
OUTPUT_BASE.mkdir(parents=True, exist_ok=True)
OUT = OUTPUT_BASE
OUT_MODEL = OUT / "model"
OUT_ARCHIVE = OUT / "archive"
OUT_MODEL.mkdir(exist_ok=True, parents=True)
OUT_ARCHIVE.mkdir(exist_ok=True, parents=True)

In [15]:
LIVE_CSV = OUT / "live_dataset.csv"
FORECAST_10M_PATH = OUT_MODEL / "forecast_10m_xgb.joblib"
CALIB_10M_PATH    = OUT_MODEL / "forecast_10m_calib.joblib"
FORECAST_10M_PARQUET = OUT / "forecast_10m.parquet"
EXPLAIN_JSON = OUT / "explain.json"
EVENTS_JSON  = OUT / "events.json"
STALE_SEC = 20 * 60

In [16]:
class Config:
    eth_rpc: str = os.getenv("ETH_RPC", "https://cloudflare-eth.com")
    chain_id: int = 1
    mock_mode: bool = True  
    lookback: int = 20
    outdir: str = str(OUT)
    chainlink_feeds = {
        "USDC/USD": "0x8fFfFfd4AfB6115b954Bd326cbe7B4BA576818f6",
        "USDT/USD": "0x3E7d1eAB13ad0104d2750B8863b489D65364e32D",
        "DAI/USD":  "0xAed0c38402a5d19df6E4c03F4E2DceD6e29c1ee9",
    }
    pools = {
        "USDC/USDT_univ3": {"address": "0x3416cf6c708da44db2624d63ea0aaef7113527c6", "type": "uniswap_v3", "symbol": "USDC/USDT"},
        "DAI/USDC_univ3":  {"address": "0x6c6bc977e13df9b0de53b251522280bb72383700",  "type": "uniswap_v3", "symbol": "DAI/USDC"},
        "3pool_curve":     {"address": "0xbebc44782c7db0a1a60cb6fe97d0b483032ff1c7",  "type": "curve",      "symbol": "USDT/USD"},
    }
    models = {"emb": "sentence-transformers/all-MiniLM-L6-v2"}
CFG = Config()

In [17]:
def _now_iso() -> str:
    return datetime.now(timezone.utc).isoformat(timespec="seconds")
def _iso_str(s) -> str:
    ts = pd.to_datetime(s, errors="coerce", utc=True)
    if isinstance(ts, pd.Series):
        return ts.dt.strftime("%Y-%m-%d %H:%M:%S%z").fillna("")
    return ts.strftime("%Y-%m-%d %H:%M:%S%z") if not pd.isna(ts) else ""

In [18]:
def rpc_health_check(rpc: str) -> bool:
    if not (_WEB3_OK and rpc):
        return False
    try:
        w3 = Web3(Web3.HTTPProvider(rpc, request_kwargs={"timeout": 6}))
        ok = w3.is_connected() if hasattr(w3, "is_connected") else w3.isConnected()
        return bool(ok)
    except Exception:
        return False
if os.getenv("ETH_RPC") and "<YOUR_KEY>" not in os.getenv("ETH_RPC") and rpc_health_check(os.getenv("ETH_RPC")):
    CFG.eth_rpc = os.getenv("ETH_RPC")
    CFG.mock_mode = False
else:
    CFG.mock_mode = True

In [19]:
_FONT_CANDIDATES = [
    r"C:\Windows\Fonts\DejaVuSans.ttf",
    r"C:\Windows\Fonts\DejaVuSansCondensed.ttf",
    r"C:\Windows\Fonts\seguiemj.ttf",
    r"C:\Windows\Fonts\seguisym.ttf",
    r"C:\Windows\Fonts\arialuni.ttf",
    r"C:\Windows\Fonts\SegoeUI.ttf",
    "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
]

In [20]:
def _find_unicode_font() -> Optional[str]:
    for p in _FONT_CANDIDATES:
        if os.path.exists(p):
            return p
    return None
UNICODE_FONT_PATH = _find_unicode_font()
UNICODE_FAMILY = "DepegUnicode"

In [21]:
def _sanitize_text(s: str) -> str:
    if s is None:
        return ""
    repl = {"—": "-", "–": "-", "…": "...", "•": "-", "“": '"', "”": '"', "’": "'", "‘": "'",
            "→": "->", "↑": "^", "↓": "v", "±": "+/-", "×": "x", "·": ".", "™": "(TM)", "®": "(R)", "©": "(C)"}
    for k, v in repl.items(): s = s.replace(k, v)
    return unicodedata.normalize("NFKD", s).encode("ascii", "ignore").decode("ascii")

In [22]:
if _FPDF_OK:
    class PDF(FPDF, HTMLMixin): pass
    def _set_font_unicode(pdf: FPDF, size: int = 10, style: str = ""):
        if UNICODE_FONT_PATH:
            try:
                if UNICODE_FAMILY not in pdf.fonts:
                    pdf.add_font(UNICODE_FAMILY, "", UNICODE_FONT_PATH, uni=True)
                    pdf.add_font(UNICODE_FAMILY, "B", UNICODE_FONT_PATH, uni=True)
                pdf.set_font(UNICODE_FAMILY, style=style, size=size); return
            except Exception as e:
                print(f"[warn] Unicode font failed: {e}")
        pdf.set_font("Helvetica", style=style, size=size)
    def _init_pdf(title: str = "Depeg Sentinel Report") -> FPDF:
        pdf = PDF(); pdf.set_auto_page_break(auto=True, margin=15); pdf.add_page()
        _set_font_unicode(pdf, size=16, style="B")
        pdf.cell(0, 10, title if UNICODE_FONT_PATH else _sanitize_text(title), ln=1)
        _set_font_unicode(pdf, size=9); pdf.cell(0, 6, f"Generated: {_now_iso()}", ln=1); pdf.ln(2)
        return pdf
    def _h(pdf: FPDF, text: str, level: int = 2):
        sizes = {1: 14, 2: 12, 3: 11}
        _set_font_unicode(pdf, size=sizes.get(level, 11), style="B")
        pdf.multi_cell(0, 6, text if UNICODE_FONT_PATH else _sanitize_text(text)); pdf.ln(1)
    def _p(pdf: FPDF, text: str):
        _set_font_unicode(pdf, size=10, style="")
        pdf.multi_cell(0, 5, text if UNICODE_FONT_PATH else _sanitize_text(text)); pdf.ln(1)
    def _kv_table(pdf: FPDF, rows: List[Tuple[str, str]]):
        _set_font_unicode(pdf, size=10, style="")
        col_w = [45, pdf.w - pdf.l_margin - pdf.r_margin - 45]; th = 6
        for k, v in rows:
            _set_font_unicode(pdf, size=10, style="B"); pdf.cell(col_w[0], th, str(k if UNICODE_FONT_PATH else _sanitize_text(k)))
            _set_font_unicode(pdf, size=10, style=""); pdf.multi_cell(col_w[1], th, str(v if UNICODE_FONT_PATH else _sanitize_text(v)))
        pdf.ln(1)
    def export_analyst_note_pdf(note_text: str, risk_now=None, risk_10m=None, risk_30m=None,
                                contributors: Optional[List[str]] = None, freshness: Optional[str] = None,
                                confidence: Optional[str] = None,
                                out_path: Path = OUT / "analyst_note.pdf", title: str = "Analyst Note") -> Path:
        pdf = _init_pdf(title=title)
        metrics = []
        if risk_now is not None: metrics.append(("Risk Score (now)", f"{risk_now:.2f}"))
        if risk_10m is not None: metrics.append(("Risk Forecast (10m)", f"{risk_10m:.2f}"))
        if risk_30m is not None: metrics.append(("Risk Forecast (30m)", f"{risk_30m:.2f}"))
        if freshness: metrics.append(("Freshness", freshness))
        if confidence: metrics.append(("Confidence", confidence))
        if contributors: metrics.append(("Top Contributors", "; ".join(contributors[:3])))
        if metrics: _kv_table(pdf, metrics)
        _h(pdf, "Analyst Note", 2); _p(pdf, note_text)
        pdf.output(str(out_path)); print(f"[ok] Analyst Note exported → {out_path}")
        return out_path
    def export_markdown_pdf(md_text: str, out_path: Path = OUT / "report.pdf", title: str = "Depeg Sentinel Report") -> Path:
        pdf = _init_pdf(title=title)
        for raw in md_text.splitlines():
            line = raw.rstrip("\n")
            if   line.startswith("### "): _h(pdf, line[4:], 3)
            elif line.startswith("## "):  _h(pdf, line[3:], 2)
            elif line.startswith("# "):   _h(pdf, line[2:], 1)
            elif line.strip() == "": pdf.ln(1)
            else: _p(pdf, line)
        pdf.output(str(out_path)); print(f"[ok] Markdown PDF exported → {out_path}")
        return out_path
else:
    def export_analyst_note_pdf(*args, **kwargs):
        path = kwargs.get("out_path", OUT / "analyst_note.txt")
        Path(path).write_text(kwargs.get("note_text", "")); print(f"[ok] Analyst Note (plain) → {path}"); return path
    def export_markdown_pdf(md_text: str, out_path: Path = OUT / "report.txt", title: str = "Depeg Sentinel Report") -> Path:
        Path(out_path).write_text(md_text); print(f"[ok] Report (plain) → {out_path}"); return out_path

In [23]:
CANON_COLS = [
    "ts","pool","dex_spot","dex_twap","oracle_ratio","dev","dev_roll_std",
    "tvl_outflow_rate","virtual_price","spot_twap_gap_bps","r0","r1","r0_delta","r1_delta",
    "event_severity_max_24h","event_count_24h","feeds_fresh","run_quality_pass","model_version","block",
    "z_if","z_lof","z_ocsvm","z_cusum","z_ae","anom_fused",
    "y_10m",
    "neighbor_max_dev","neighbor_avg_anom","lead_lag_best","corr_best",
]

In [24]:
def ensure_live_schema(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    for col in CANON_COLS:
        if col not in df.columns: df[col] = np.nan
    return df[CANON_COLS]
def write_live(df: pd.DataFrame) -> Path:
    df2 = ensure_live_schema(df)
    df2["ts"] = _iso_str(df2["ts"])
    df2.to_csv(LIVE_CSV, index=False); return LIVE_CSV

In [25]:
def load_live(n: int | None = None) -> pd.DataFrame:
    if not LIVE_CSV.exists(): return pd.DataFrame(columns=CANON_COLS)
    df = pd.read_csv(LIVE_CSV, dtype=str)
    num_cols = [c for c in CANON_COLS if c not in ("ts","pool","model_version","block")]
    for c in num_cols:
        if c in df.columns: df[c] = pd.to_numeric(df[c], errors="coerce")
    if "ts" in df.columns:
        df["ts"] = pd.to_datetime(df["ts"], utc=True, errors="coerce")
    df = ensure_live_schema(df)
    return df.tail(n) if n else df
def _pct(a, b):
    if pd.isna(a) or pd.isna(b) or b == 0: return np.nan
    return (a - b) / b

In [26]:
@dataclass
class OnchainResult:
    ok: bool
    ts: str
    block: Optional[int]
    data: dict
    err: Optional[str] = None

In [27]:
ABI_AGG = json.loads("""[
 {"inputs":[],"name":"decimals","outputs":[{"internalType":"uint8","name":"","type":"uint8"}],"stateMutability":"view","type":"function"},
 {"inputs":[],"name":"latestRoundData","outputs":[
  {"internalType":"uint80","name":"roundId","type":"uint80"},
  {"internalType":"int256","name":"answer","type":"int256"},
  {"internalType":"uint256","name":"startedAt","type":"uint256"},
  {"internalType":"uint256","name":"updatedAt","type":"uint256"},
  {"internalType":"uint80","name":"answeredInRound","type":"uint80"}],
 "stateMutability":"view","type":"function"}]""")

In [28]:
class OnChainTool:
    def __init__(self, rpc: str, mock: bool = False):
        self.mock = mock or not _WEB3_OK
        self.w3 = None
        if not self.mock and _WEB3_OK and rpc:
            try:
                self.w3 = Web3(Web3.HTTPProvider(rpc, request_kwargs={"timeout": 8}))
                connected = self.w3.is_connected() if hasattr(self.w3, "is_connected") else self.w3.isConnected()
                if not connected:
                    print("[onchain] RPC not reachable → mock_mode"); self.mock = True
                else:
                    try:
                        net_id = self.w3.eth.chain_id
                        print(f"[onchain] connected. chain_id={net_id}")
                    except Exception:
                        print("[onchain] connected (chain_id check failed)")
            except Exception as e:
                print(f"[onchain] init error: {e} → mock_mode"); self.mock = True
        else:
            print("[onchain] mock_mode enabled")
    def _call(self, fn):
        return fn()
    def get_oracle_price(self, feed_addr: str) -> OnchainResult:
        ts = _now_iso()
        if self.mock or not feed_addr or feed_addr.lower().startswith("0x0000"):
            px = float(1.0 + np.random.normal(0, 1e-4))
            return OnchainResult(True, ts, None, {"price": px, "decimals": 8, "source": "mock"})
        try:
            feed = Web3.to_checksum_address(feed_addr)
            c = self.w3.eth.contract(address=feed, abi=ABI_AGG)
            decimals = int(self._call(lambda: c.functions.decimals().call()))
            rd = self._call(lambda: c.functions.latestRoundData().call())
            ans, upd = rd[1], int(rd[3])
            price = float(ans) / (10 ** decimals)
            block = self.w3.eth.block_number
            return OnchainResult(True, ts, block, {"price": price, "decimals": decimals, "updatedAt": upd, "source": "chainlink"})
        except Exception as e:
            return OnchainResult(False, ts, None, {}, str(e))
    def get_univ3_price(self, pool_addr: str) -> OnchainResult:
        ts = _now_iso()
        if self.mock or not pool_addr or pool_addr.lower().startswith("0x0000"):
            base = 1.0 + np.random.normal(0, 3e-4)
            return OnchainResult(True, ts, None, {"price": float(base), "sqrtPriceX96": None, "source": "mock"})
        try:
            _ABI_UNIV3_POOL = [
                {"name":"slot0","inputs":[],"outputs":[
                    {"type":"uint160","name":"sqrtPriceX96"},
                    {"type":"int24","name":"tick"},
                    {"type":"uint16","name":"observationIndex"},
                    {"type":"uint16","name":"observationCardinality"},
                    {"type":"uint16","name":"observationCardinalityNext"},
                    {"type":"uint8","name":"feeProtocol"},
                    {"type":"bool","name":"unlocked"}],
                 "stateMutability":"view","type":"function"},
                {"name":"token0","inputs":[],"outputs":[{"type":"address"}], "stateMutability":"view","type":"function"},
                {"name":"token1","inputs":[],"outputs":[{"type":"address"}], "stateMutability":"view","type":"function"},
            ]
            _ABI_ERC20_DEC = [
                {"name":"decimals","inputs":[],"outputs":[{"type":"uint8"}], "stateMutability":"view","type":"function"},
                {"name":"symbol","inputs":[],"outputs":[{"type":"string"}],   "stateMutability":"view","type":"function"},
            ]
            pool = Web3.to_checksum_address(pool_addr)
            c = self.w3.eth.contract(address=pool, abi=_ABI_UNIV3_POOL)
            slot0 = self._call(lambda: c.functions.slot0().call()); sqrtp = slot0[0]
            t0 = self._call(lambda: c.functions.token0().call()); t1 = self._call(lambda: c.functions.token1().call())
            t0c = self.w3.eth.contract(address=t0, abi=_ABI_ERC20_DEC)
            t1c = self.w3.eth.contract(address=t1, abi=_ABI_ERC20_DEC)
            d0 = int(self._call(lambda: t0c.functions.decimals().call()))
            d1 = int(self._call(lambda: t1c.functions.decimals().call()))
            p_raw = (sqrtp / (2**96))**2
            price = float(p_raw * (10 ** (d0 - d1)))
            block = self.w3.eth.block_number
            return OnchainResult(True, ts, block, {
                "price": price, "sqrtPriceX96": int(sqrtp),
                "token0": t0, "token1": t1, "decimals0": d0, "decimals1": d1, "source": "uniswap_v3"
            })
        except Exception as e:
            return OnchainResult(False, ts, None, {}, str(e))
    def get_curve_virtual_price(self, pool_addr: str) -> OnchainResult:
        ts = _now_iso()
        if self.mock or not pool_addr or pool_addr.lower().startswith("0x0000"):
            vp = 1.0 + np.random.normal(0, 2e-4)
            return OnchainResult(True, ts, None, {"virtual_price": float(vp), "source": "mock"})
        try:
            abi = [
                {"name":"get_virtual_price","inputs":[],"outputs":[{"type":"uint256"}],"stateMutability":"view","type":"function"},
                {"name":"virtual_price","inputs":[],"outputs":[{"type":"uint256"}],"stateMutability":"view","type":"function"},
            ]
            pool = Web3.to_checksum_address(pool_addr)
            c = self.w3.eth.contract(address=pool, abi=abi)
            def _try_vp():
                try: return int(c.functions.get_virtual_price().call())
                except Exception: return int(c.functions.virtual_price().call())
            raw = self._call(_try_vp)
            vp = raw / 1e18 if raw > 10**9 else raw
            block = self.w3.eth.block_number
            return OnchainResult(True, ts, block, {"virtual_price": float(vp), "source": "curve"})
        except Exception as e:
            return OnchainResult(False, ts, None, {}, str(e))
    def get_reserves_min(self, pool_addr: str) -> OnchainResult:
        ts = _now_iso()
        if self.mock or not pool_addr or pool_addr.lower().startswith("0x0000"):
            r0 = float(1e9 + np.random.randint(-5e6, 5e6))
            r1 = float(1e9 + np.random.randint(-5e6, 5e6))
            return OnchainResult(True, ts, None, {"r0": r0, "r1": r1, "source": "mock"})
        try:
            abi = [{"name":"getReserves","inputs":[],"outputs":[
                {"type":"uint112","name":"_reserve0"},{"type":"uint112","name":"_reserve1"},{"type":"uint32","name":"_blockTimestampLast"}],
                "stateMutability":"view","type":"function"}]
            pool = Web3.to_checksum_address(pool_addr)
            c = self.w3.eth.contract(address=pool, abi=abi)
            r0, r1, ts_last = self._call(lambda: c.functions.getReserves().call())
            block = self.w3.eth.block_number
            return OnchainResult(True, ts, block, {"r0": float(r0), "r1": float(r1), "ts_last": int(ts_last), "source": "getReserves"})
        except Exception as e:
            return OnchainResult(False, ts, None, {}, str(e))

In [29]:
onchain = OnChainTool(CFG.eth_rpc, mock=CFG.mock_mode)

[onchain] mock_mode enabled


In [30]:
class _TWAPCache:
    def __init__(self, alpha: float = 0.2): self.alpha = alpha; self.state: Dict[str, float] = {}
    def update(self, pool: str, spot: float) -> float:
        if np.isnan(spot): return np.nan
        prev = self.state.get(pool, spot); twap = self.alpha*spot + (1-self.alpha)*prev
        self.state[pool] = twap; return twap
_TWAP = _TWAPCache(alpha=0.2)

In [31]:
def _last_per_pool(col: str) -> dict[str, float]:
    df = load_live(5000)
    if df.empty or col not in df.columns: return {}
    return df.sort_values("ts").groupby("pool")[col].last().to_dict()

In [32]:
def engineer_features(raw_rows: List[dict]) -> pd.DataFrame:
    df = pd.DataFrame(raw_rows)
    if df.empty: return df
    if "dex_spot" in df.columns and "virtual_price" in df.columns:
        df["dex_spot"] = pd.to_numeric(df["dex_spot"], errors="coerce")
        df["virtual_price"] = pd.to_numeric(df["virtual_price"], errors="coerce")
        df["dex_spot"] = df["dex_spot"].where(pd.notna(df["dex_spot"]), df["virtual_price"])
    df["oracle_px"] = pd.to_numeric(df["oracle_px"], errors="coerce")
    df["oracle_ratio"] = df["dex_spot"] / df["oracle_px"]
    df["dev"]          = (df["dex_spot"] - df["oracle_px"]) / df["oracle_px"]
    df["dex_twap"] = [ _TWAP.update(p, float(s) if pd.notna(s) else np.nan) for p, s in zip(df["pool"], df["dex_spot"]) ]
    df["spot_twap_gap_bps"] = (df["dex_spot"] - df["dex_twap"]) / df["dex_twap"].replace(0,np.nan) * 1e4
    df["spot_twap_gap_bps"] = df["spot_twap_gap_bps"].replace([np.inf,-np.inf], np.nan)
    last_r0 = _last_per_pool("r0"); last_r1 = _last_per_pool("r1")
    df["r0_delta"] = [np.nan if np.isnan(r) else (r - last_r0.get(p, r)) for p, r in zip(df["pool"], df["r0"])]
    df["r1_delta"] = [np.nan if np.isnan(r) else (r - last_r1.get(p, r)) for p, r in zip(df["pool"], df["r1"])]
    df["tvl_outflow_rate"] = [
        _pct((r0 or 0)+(r1 or 0), (last_r0.get(p, r0) or 0)+(last_r1.get(p, r1) or 0))
        for p, r0, r1 in zip(df["pool"], df["r0"], df["r1"])
    ]
    hist = load_live(1000)
    if not hist.empty:
        hist = pd.concat([hist, df], ignore_index=True).sort_values(["pool","ts"])
        hist["dev_roll_std"] = hist.groupby("pool")["dev"].rolling(20, min_periods=5).std().reset_index(level=0, drop=True)
        df = df.merge(hist.loc[hist["ts"].isin(df["ts"])][["ts","pool","dev_roll_std"]], on=["ts","pool"], how="left")
    else:
        df["dev_roll_std"] = np.nan
    df["event_severity_max_24h"] = df.get("event_severity_max_24h", pd.Series([0]*len(df)))
    df["event_count_24h"]        = df.get("event_count_24h", pd.Series([0]*len(df)))
    df["feeds_fresh"]            = True
    df["run_quality_pass"]       = df[["dex_spot","oracle_px","dev"]].notna().all(axis=1)
    df["model_version"]          = "v2.0.0"
    if "block" not in df.columns: df["block"] = None
    return ensure_live_schema(df)

In [33]:
def sample_once() -> pd.DataFrame:
    rows = []
    ts_now = _now_iso()
    for pool_name, meta in CFG.pools.items():
        sym  = meta.get("symbol", "USDC/USD"); addr = meta["address"]; typ  = meta["type"]
        uni = onchain.get_univ3_price(addr) if typ=="uniswap_v3" else None
        cur = onchain.get_curve_virtual_price(addr) if typ=="curve" else None
        ora = onchain.get_oracle_price(CFG.chainlink_feeds.get(sym, ""))
        dex_spot   = (uni.data.get("price") if uni and uni.ok else np.nan) if typ=="uniswap_v3" else np.nan
        virt_price = (cur.data.get("virtual_price") if cur and cur.ok else np.nan) if typ=="curve" else np.nan
        oracle_px  = (ora.data.get("price") if ora and ora.ok else np.nan)
        res = onchain.get_reserves_min(addr)
        r0 = float(res.data.get("r0")) if res and res.ok else np.nan
        r1 = float(res.data.get("r1")) if res and res.ok else np.nan
        rows.append({"ts": ts_now, "pool": pool_name, "dex_spot": float(dex_spot) if pd.notna(dex_spot) else np.nan,
                     "oracle_px": float(oracle_px) if pd.notna(oracle_px) else np.nan, "virtual_price": float(virt_price) if pd.notna(virt_price) else np.nan,
                     "r0": r0, "r1": r1, "block": None})
    feat = engineer_features(rows); append_live(feat); return feat

In [34]:
def _scale_01(x: np.ndarray) -> np.ndarray:
    a = np.nanmin(x); b = np.nanmax(x)
    if not np.isfinite(a) or not np.isfinite(b) or b-a == 0: return np.zeros_like(x)
    return (x - a) / (b - a)

In [35]:
def _cusum_score(dev: pd.Series, w: int = 30, k: float = 0.0) -> pd.Series:
    s = dev.fillna(0).astype(float).to_numpy()
    g_pos = np.zeros_like(s, dtype=float); g_neg = np.zeros_like(s, dtype=float)
    for i in range(1, len(s)):
        g_pos[i] = max(0.0, g_pos[i-1] + (s[i] - k))
        g_neg[i] = min(0.0, g_neg[i-1] + (s[i] + k))
    mag = np.abs(g_pos) + np.abs(g_neg)
    out = pd.Series(mag, index=dev.index)
    out_rolled = out.rolling(w, min_periods=5).apply(lambda z: z[-1] / (np.nanmax(np.abs(z))+1e-9), raw=True)
    return out_rolled.fillna(0.0)

In [36]:
def _autoencoder_scores(df_feat: pd.DataFrame, X: np.ndarray) -> np.ndarray:
    if not _TORCH_OK: return np.zeros(len(df_feat))
    inp = X.astype(np.float32); d = inp.shape[1]
    class AE(nn.Module):
        def __init__(self, d):
            super().__init__()
            self.enc = nn.Sequential(nn.Linear(d, max(4, d//2)), nn.ReLU(), nn.Linear(max(4, d//2), max(2, d//4)))
            self.dec = nn.Sequential(nn.Linear(max(2, d//4), max(4, d//2)), nn.ReLU(), nn.Linear(max(4, d//2), d))
        def forward(self, x): return self.dec(self.enc(x))
    model = AE(d); opt = torch.optim.Adam(model.parameters(), lr=1e-3)
    ds = TensorDataset(torch.from_numpy(inp)); dl = DataLoader(ds, batch_size=64, shuffle=True)
    model.train()
    for _ in range(5):
        for (xb,) in dl:
            opt.zero_grad(); loss = ((model(xb) - xb)**2).mean(); loss.backward(); opt.step()
    model.eval()
    with torch.no_grad(): rec = model(torch.from_numpy(inp)).numpy()
    return _scale_01(np.mean((rec - inp)**2, axis=1))

In [37]:
def run_anomaly_zoo_update_live(features_cols: List[str] | None = None) -> pd.DataFrame:
    df = load_live()
    if df.empty or df["dev"].isna().all():
        print("[zoo] live dataset empty; sample first."); return df
    if not features_cols:
        features_cols = ["dev", "dev_roll_std", "tvl_outflow_rate", "spot_twap_gap_bps", "oracle_ratio"]
    use_cols = [c for c in features_cols if c in df.columns]
    if not use_cols: raise ValueError("No usable feature columns found in live dataset.")
    df_proc = df.copy()
    X = df_proc[use_cols].astype(float).replace([np.inf, -np.inf], np.nan).fillna(0.0).to_numpy()
    scaler = StandardScaler() if _SK_OK else None; Xs = scaler.fit_transform(X) if scaler is not None else X
    z_if = np.zeros(len(df_proc)); z_lof = np.zeros(len(df_proc)); z_ocsvm = np.zeros(len(df_proc))
    if _SK_OK:
        z_if = _scale_01(-IsolationForest(n_estimators=200, contamination="auto", random_state=SEED).fit(Xs).score_samples(Xs))
        try:
            lof2 = LocalOutlierFactor(n_neighbors=20, novelty=True).fit(Xs)
            z_lof = _scale_01(-lof2.score_samples(Xs))
        except Exception:
            lof = LocalOutlierFactor(n_neighbors=20, novelty=False)
            z_lof = _scale_01((-lof.fit_predict(Xs)).astype(float))
        try:
            oc = OneClassSVM(kernel="rbf", gamma="scale", nu=0.05)
            z_ocsvm = _scale_01(-oc.fit(Xs).decision_function(Xs))
        except Exception:
            z_ocsvm = np.zeros(len(df_proc))
    z_cusum = []
    for _, g in df_proc.sort_values(["pool","ts"]).groupby("pool", sort=False):
        z_cusum.append(_cusum_score(g["dev"]))
    z_cusum = _scale_01(pd.concat(z_cusum).reindex(df_proc.index).fillna(0.0).to_numpy())
    try:
        z_ae = _autoencoder_scores(df_proc, Xs)
    except Exception:
        z_ae = np.zeros(len(df_proc))
    fused = np.nanmax(np.vstack([z_if, z_lof, z_ocsvm, z_cusum, z_ae]), axis=0)
    for k, arr in [("z_if", z_if), ("z_lof", z_lof), ("z_ocsvm", z_ocsvm), ("z_cusum", z_cusum), ("z_ae", z_ae), ("anom_fused", fused)]:
        df_proc[k] = arr
    write_live(df_proc); print("[zoo] anomaly scores updated"); return df_proc.tail(10)

In [38]:
def _time_split_idx(ts: pd.Series, frac_train: float = 0.70):
    order = np.argsort(ts.fillna(pd.Timestamp.utcnow()).values)
    n = len(order); cut = int(n*frac_train)
    return order[:cut], order[cut:]

In [39]:
def label_targets(df: pd.DataFrame, horizon: int = 10, dev_thr: float = 0.005,
                  fused_col: str | None = None, fused_thr: float = 0.90) -> pd.Series:
    df = df.sort_values(["pool","ts"]).reset_index(drop=True)
    y = pd.Series(0, index=df.index)
    by = df.groupby("pool")
    for pool, g in by:
        g_idx = g.index.to_list()
        g_dev = g["dev"].abs().to_numpy()
        fused = g[fused_col].to_numpy() if fused_col and fused_col in g.columns else None
        n = len(g_idx)
        for i in range(n):
            j1, j2 = i+1, min(n, i+1+horizon)
            if j1 >= j2: continue
            cond_dev = np.nanmax(g_dev[j1:j2])
            cond_fused = np.nanmax(fused[j1:j2]) if fused is not None else 0.0
            if (cond_dev >= dev_thr) or (fused is not None and cond_fused >= fused_thr):
                y.iloc[g_idx[i]] = 1
    return y

In [40]:
def _ensure_labels(df: pd.DataFrame, base_thr=0.005) -> pd.Series:
    y = label_targets(df, horizon=10, dev_thr=base_thr, fused_col="anom_fused" if "anom_fused" in df.columns else None)
    if y.nunique() > 1: return y
    y2 = label_targets(df, horizon=10, dev_thr=max(0.001, base_thr/5), fused_col="anom_fused" if "anom_fused" in df.columns else None)
    if y2.nunique() > 1: return y2
    af = df["anom_fused"].replace([np.inf,-np.inf], np.nan).fillna(0.0)
    k = max(1, int(0.05*len(af))); thr = np.partition(af.values, -k)[-k]
    return (af >= thr).astype(int)

In [41]:
def train_forecaster_10m(feature_cols: List[str] | None = None, label_col: str = "y_10m"):
    if not _XGB_OK or not _SK_OK: raise RuntimeError("xgboost + scikit-learn required to train forecaster.")
    df = load_live().copy()
    if df.empty: raise ValueError("live_dataset is empty. sample more rows first.")
    if label_col not in df.columns or df[label_col].nunique(dropna=True) < 2:
        df[label_col] = _ensure_labels(df); write_live(df)
    if feature_cols is None:
        feature_cols = ["dev","dev_roll_std","tvl_outflow_rate","spot_twap_gap_bps","oracle_ratio","anom_fused","r0_delta","r1_delta","event_severity_max_24h","event_count_24h"]
    use_cols = [c for c in feature_cols if c in df.columns]
    X = df[use_cols].replace([np.inf,-np.inf], np.nan).fillna(0.0).to_numpy()
    y = pd.to_numeric(df[label_col], errors="coerce").fillna(0).astype(int).to_numpy()
    if len(np.unique(y)) < 2:
        df[label_col] = _ensure_labels(df); write_live(df)
        y = pd.to_numeric(df[label_col], errors="coerce").fillna(0).astype(int).to_numpy()
    ts = df["ts"]; tr_idx, te_idx = _time_split_idx(ts, 0.70)
    Xtr, Xte = X[tr_idx], X[te_idx]; ytr, yte = y[tr_idx], y[te_idx]
    clf = xgb.XGBClassifier(n_estimators=250, max_depth=4, learning_rate=0.06,
                            subsample=0.9, colsample_bytree=0.8,
                            objective="binary:logistic", eval_metric="logloss",
                            random_state=SEED, n_jobs=0)
    clf.fit(Xtr, ytr)
    calib = None
    unique, counts = np.unique(ytr, return_counts=True)
    min_cls = min(counts) if len(counts)==2 else 0
    if min_cls >= 3:
        try:
            calib = CalibratedClassifierCV(clf, method="isotonic", cv=3); calib.fit(Xtr, ytr)
        except Exception: calib = None
    if calib is None and min_cls >= 2:
        try:
            calib = CalibratedClassifierCV(clf, method="sigmoid", cv=2); calib.fit(Xtr, ytr)
        except Exception: calib = None
    def _proba(model, X_): return (model or clf).predict_proba(X_)[:,1]
    ap, bs = float("nan"), float("nan")
    if len(np.unique(yte))>1:
        p_te = _proba(calib, Xte)
        try: ap = average_precision_score(yte, p_te)
        except Exception: pass
        try: bs = float(brier_score_loss(yte, p_te))
        except Exception: bs = float("nan")
    print(f"[forecast10m] AP={ap if pd.notna(ap) else float('nan'):.3f}  Brier={bs if pd.notna(bs) else float('nan'):.3f}  n_te={len(yte)}  calib={'iso3' if (calib and getattr(calib,'method','')=='isotonic') else ('sig2' if calib else 'none')}")
    dump(clf, FORECAST_10M_PATH); 
    if calib: dump(calib, CALIB_10M_PATH)
    elif CALIB_10M_PATH.exists(): CALIB_10M_PATH.unlink()
    tail = df.tail(6).copy()
    X_tail = tail[use_cols].replace([np.inf,-np.inf], np.nan).fillna(0.0).to_numpy()
    tail["risk_forecast_10m"] = _proba(calib, X_tail)
    return tail[["ts","pool","anom_fused","risk_forecast_10m"]]

In [42]:
def _load_forecaster():
    clf = load(FORECAST_10M_PATH) if FORECAST_10M_PATH.exists() else None
    calib = load(CALIB_10M_PATH) if CALIB_10M_PATH.exists() else None
    return clf, calib
def _proba_model(clf, calib, X: np.ndarray) -> np.ndarray:
    model = calib if calib is not None else clf
    if model is None: raise RuntimeError("No 10m forecaster found. Run train_forecaster_10m() first.")
    return model.predict_proba(X)[:, 1]

In [43]:
def score_latest_10m(n_tail: int = 60, feature_cols: Sequence[str] | None = None, write_parquet: bool = True) -> pd.DataFrame:
    df = load_live()
    if df.empty: raise ValueError("live_dataset is empty. Run sample_once() first.")
    if feature_cols is None:
        feature_cols = ["dev","dev_roll_std","tvl_outflow_rate","spot_twap_gap_bps","oracle_ratio","anom_fused","r0_delta","r1_delta","event_severity_max_24h","event_count_24h"]
    use_cols = [c for c in feature_cols if c in df.columns]
    X_tail = df.tail(n_tail)[use_cols].replace([np.inf,-np.inf], np.nan).fillna(0.0).to_numpy()
    clf, calib = _load_forecaster(); p = _proba_model(clf, calib, X_tail)
    out = df.tail(n_tail).copy(); out["risk_forecast_10m"] = p
    out = out[["ts","pool","anom_fused","risk_forecast_10m"]]
    if write_parquet:
        try:
            out.to_parquet(FORECAST_10M_PARQUET, index=False); print(f"[forecast10m] wrote {FORECAST_10M_PARQUET}")
        except Exception as e:
            print(f"[warn] parquet write failed: {e}")
    return out.tail(10)

In [44]:
def explain_forecast_10m(feature_cols: Sequence[str] | None = None, n_repeats: int = 8) -> dict:
    if not _SK_OK:
        payload = {"ts": _now_iso(), "top_contributors": [], "all_features": []}
        EXPLAIN_JSON.write_text(json.dumps(payload, indent=2)); print(f"[explain] wrote {EXPLAIN_JSON}"); return payload
    df = load_live()
    if df.empty: raise ValueError("live_dataset is empty.")
    if "y_10m" not in df.columns or df["y_10m"].nunique() < 2:
        df["y_10m"] = _ensure_labels(df); write_live(df)
    if feature_cols is None:
        feature_cols = ["dev","dev_roll_std","tvl_outflow_rate","spot_twap_gap_bps","oracle_ratio","anom_fused","r0_delta","r1_delta","event_severity_max_24h","event_count_24h"]
    use_cols = [c for c in feature_cols if c in df.columns]
    X = df[use_cols].replace([np.inf,-np.inf], np.nan).fillna(0.0).to_numpy()
    y = df["y_10m"].astype(int).to_numpy()
    tr_idx, te_idx = _time_split_idx(df["ts"], 0.70)
    Xte, yte = X[te_idx], y[te_idx]
    clf, calib = _load_forecaster()
    if clf is None and calib is None:
        _ = train_forecaster_10m(feature_cols=use_cols); clf, calib = _load_forecaster()
    model = calib if calib is not None else clf
    if len(np.unique(yte)) < 2:
        payload = {"ts": _now_iso(), "top_contributors": [], "all_features": []}
    else:
        r = permutation_importance(model, Xte, yte, n_repeats=n_repeats, scoring="average_precision", random_state=SEED)
        imp = sorted(zip(use_cols, r.importances_mean, r.importances_std), key=lambda z: z[1], reverse=True)
        top3 = [f"{name} (+{mean:.4f}±{std:.4f} AP)" for name, mean, std in imp[:3]]
        payload = {"ts": _now_iso(), "top_contributors": top3, "all_features": [{"feature": n, "mean": float(m), "std": float(s)} for n,m,s in imp]}
    EXPLAIN_JSON.write_text(json.dumps(payload, indent=2)); print(f"[explain] wrote {EXPLAIN_JSON}"); return payload

In [45]:
@dataclass
class PolicyDecision:
    level: str
    actions: list
    rationale: str
    requires_ack: bool

In [46]:
class SeverityModel:
    def __init__(self, emb_model_name: Optional[str] = None, device: Optional[str] = None):
        self.model = None; self.emb = None; self.dim = 128
        self.emb_name = emb_model_name or "sentence-transformers/all-MiniLM-L6-v2"
        try:
            from sklearn.linear_model import LogisticRegression as _LR
            self._LR = _LR; self._sk_ok = True
        except Exception as e:
            self._LR = None; self._sk_ok = False; warnings.warn(f"[note] scikit-learn unavailable: {e}. Using prior-only severity.")
        try:
            from sentence_transformers import SentenceTransformer as _SB
            kwargs = {}; 
            if device: kwargs["device"] = device
            self.emb = _SB(self.emb_name, **kwargs)
            try: self.dim = int(self.emb.get_sentence_embedding_dimension())
            except Exception: pass
        except Exception as e:
            self.emb = None; warnings.warn(f"[note] sentence-transformers unavailable: {e}. Using zero embeddings.")
    def _embed(self, texts: List[str]) -> np.ndarray:
        texts = [t if isinstance(t, str) else "" for t in texts]
        if self.emb is None: return np.zeros((len(texts), self.dim), dtype=np.float32)
        try: return self.emb.encode(texts, normalize_embeddings=True)
        except Exception as e:
            warnings.warn(f"[note] embedding failed: {e}. Returning zeros.")
            return np.zeros((len(texts), self.dim), dtype=np.float32)
    def fit(self, texts: List[str], labels: List[int]):
        if not texts: warnings.warn("[warn] fit called with empty texts; skipping."); return self
        if len(texts) != len(labels): raise ValueError("texts and labels must have the same length")
        y = np.asarray(labels, dtype=int)
        if self.emb is None:
            p = float(np.mean(y)) if y.size else 0.0; self.model = {"prior": max(0.0, min(1.0, p))}
            warnings.warn("[note] sentence-transformers missing; using prior-only severity."); return self
        uniq = np.unique(y)
        if not getattr(self, "_sk_ok", False) or uniq.size < 2:
            p = float(np.mean(y)) if y.size else 0.0; self.model = {"prior": max(0.0, min(1.0, p))}
            if not getattr(self, "_sk_ok", False): warnings.warn("[note] sklearn missing; using prior-only severity.")
            elif uniq.size < 2: warnings.warn("[note] one-class labels; using prior-only severity.")
            return self
        X = self._embed(texts)
        try:
            clf = self._LR(max_iter=200); clf.fit(X, y); self.model = clf
        except Exception as e:
            p = float(np.mean(y)); self.model = {"prior": max(0.0, min(1.0, p))}
            warnings.warn(f"[note] LR fit failed: {e}. Using prior-only severity.")
        return self
    def predict_proba(self, texts: List[str]) -> List[float]:
        if self.model is None: return [0.0]*len(texts)
        if isinstance(self.model, dict) and "prior" in self.model:
            p = float(self.model["prior"]); return [p]*len(texts)
        X = self._embed(texts)
        try:
            proba = self.model.predict_proba(X)[:, -1]; return np.clip(proba, 0.0, 1.0).tolist()
        except Exception as e:
            warnings.warn(f"[note] predict_proba failed: {e}. Returning zeros."); return [0.0]*len(texts)
    def predict(self, texts: List[str], threshold: float = 0.5) -> List[int]:
        p = self.predict_proba(texts); return [int(x >= float(threshold)) for x in p]

In [47]:
def _load_events() -> list[dict]:
    try:
        if EVENTS_JSON.exists():
            return json.loads(EVENTS_JSON.read_text())
    except Exception as e:
        print(f"[events] load failed: {e}")
    return []
def _save_events(ev: list[dict]) -> None:
    try:
        EVENTS_JSON.write_text(json.dumps(ev, indent=2))
    except Exception as e:
        print(f"[events] save failed: {e}")

In [48]:
def _http_get_json(url: str, timeout: int = 10) -> dict | list | None:
    try:
        import requests
        r = requests.get(url, timeout=timeout)
        if r.status_code == 200:
            try:
                return r.json()
            except Exception:
                return {"text": r.text}
        else:
            print(f"[offchain] non-200 from {url}: {r.status_code}")
    except Exception as e:
        print(f"[offchain] fetch error {url}: {e}")
    return None

In [49]:
def fetch_curve_status_min() -> list[dict]:
    """Minimal status fetch from Curve status page → single normalized event."""
    data = _http_get_json("https://status.curve.fi/api/v2/status.json")
    out = []
    now = _now_iso()
    if isinstance(data, dict) and "status" in data:
        summary = data["status"].get("description") or data["status"].get("indicator", "")
        out.append({
            "type": "status_update",
            "severity": 2,
            "ts": now,
            "summary": f"Curve status: {summary}",
            "source": "https://status.curve.fi"
        })
    return out

In [50]:
def fetch_curve_forum_min() -> list[dict]:
    """Placeholder/governance ping (keep it simple; real crawler can replace)."""
    return [{
        "type": "governance_vote",
        "severity": 1,
        "ts": _now_iso(),
        "summary": "Forum chatter: parameter tweak discussion (placeholder).",
        "source": "https://gov.curve.fi/"
    }]

In [51]:
def update_events_from_sources() -> list[dict]:
    """Fetch events from a few off-chain sources, dedupe, persist to EVENTS_JSON."""
    ev = _load_events()
    try:
        ev += fetch_curve_status_min()
    except Exception as e:
        print(f"[offchain] curve status skipped: {e}")
    try:
        ev += fetch_curve_forum_min()
    except Exception as e:
        print(f"[offchain] curve forum skipped: {e}")
    uniq = {}
    for e in ev:
        key = (str(e.get("summary", ""))[:120], str(e.get("source", "")), (str(e.get("ts", "")) or "")[:16])
        if key not in uniq:
            uniq[key] = {
                "type": str(e.get("type", "information")),
                "severity": int(e.get("severity", 1)),
                "ts": str(e.get("ts") or _now_iso()),
                "summary": str(e.get("summary", ""))[:280],
                "source": str(e.get("source", "")),
            }
    ev2 = list(uniq.values())
    _save_events(ev2)
    print(f"[offchain] events.json updated: {len(ev2)} events")
    return ev2

In [52]:
try:
    SEV_MODEL  
except NameError:
    try:
        SEV_MODEL = SeverityModel()
    except Exception:
        class _DummySev:
            def fit(self, *_, **__): return self
            def predict(self, texts, threshold: float = 0.5): return [1 if t else 0 for t in texts]
        SEV_MODEL = _DummySev()




In [53]:
def enrich_events_and_aggregate():
    """
    - Loads EVENTS_JSON (creates a tiny mock if empty)
    - Fits severity model and writes back 'severity' labels
    - Updates live tail rows per pool with event_count_24h & event_severity_max_24h
    """
    ev = _load_events()
    if not ev:
        ev = [{
            "type": "status_update",
            "severity": 2,
            "ts": _now_iso(),
            "summary": "noisy markets on stables; monitoring.",
            "source": "mock://status"
        }]
        _save_events(ev)
    texts = [e.get("summary", "") for e in ev]
    y_boot = [int(e.get("severity", 2)) for e in ev]  
    try:
        SEV_MODEL.fit(texts, y_boot)
        sev_pred = SEV_MODEL.predict(texts)
    except Exception as e:
        print(f"[note] severity fit skipped: {e}")
        sev_pred = [int(bool(y)) for y in y_boot]
    for i, s in enumerate(sev_pred):
        try:
            ev[i]["severity"] = int(s)
        except Exception:
            ev[i]["severity"] = int(y_boot[i]) if i < len(y_boot) else 1
    _save_events(ev)
    df = load_live()
    if df.empty:
        print("[enrich] live empty; nothing to write")
        return None
    max_sev = int(max([e.get("severity", 0) for e in ev]) if ev else 0)
    cnt = int(len(ev))
    df_latest = df.sort_values("ts").groupby("pool", as_index=False).tail(1).copy()
    df_latest["event_severity_max_24h"] = max_sev
    df_latest["event_count_24h"] = cnt
    live = load_live()
    live["ts_str"] = _iso_str(live["ts"])
    df_latest["ts_str"] = _iso_str(df_latest["ts"])
    live_idx = live.set_index(["pool", "ts_str"])
    upd_idx  = df_latest.set_index(["pool", "ts_str"])[["event_severity_max_24h", "event_count_24h"]]
    live_idx.update(upd_idx)
    live_updated = live_idx.reset_index().drop(columns=["ts_str"])
    write_live(live_updated)
    print("[enrich] wrote event aggregates to live tail rows")
    return df_latest[["ts","pool","event_severity_max_24h","event_count_24h"]]

In [54]:
def build_analyst_note_v2() -> dict:
    """
    Creates an analyst note PDF with:
      - current fused anomaly (risk_now)
      - 10-min forecast probability
      - top contributors from explain.json
      - cross-pool propagation cue
      - freshness + confidence band
    """
    tail = load_live().sort_values("ts").groupby("pool").tail(1)
    if tail.empty:
        return {"ok": False, "reason": "no data"}
    try:
        explain = json.loads(EXPLAIN_JSON.read_text()) if EXPLAIN_JSON.exists() else explain_forecast_10m()
        top3 = explain.get("top_contributors", [])[:3]
    except Exception:
        top3 = []
    cue = None
    try:
        net = compute_network_features()
        if net is not None and not net.empty:
            nrow = net.sort_values("neighbor_avg_anom", ascending=False).iloc[0]
            lead_str = "leads" if nrow["lead_lag_best"] > 0 else ("lags" if nrow["lead_lag_best"] < 0 else "co-moves")
            cue = f"{nrow['pool']} {lead_str} peers (corr={nrow['corr_best']:.2f})."
    except Exception as e:
        print(f"[note] network features skipped in note: {e}")
    risk_now, p10 = 0.0, 0.0
    try:
        scored = score_latest_10m(n_tail=len(tail), write_parquet=False)
        if not scored.empty:
            rmax = scored.sort_values("risk_forecast_10m", ascending=False).iloc[0]
            risk_now = float(rmax.get("anom_fused", 0.0))
            p10 = float(rmax.get("risk_forecast_10m", 0.0))
    except Exception as e:
        print(f"[note] forecast in note skipped: {e}")
    band = "High" if p10 >= 0.60 or risk_now >= 0.90 else ("Medium" if p10 >= 0.35 or risk_now >= 0.70 else "Low")
    freshness = "Fresh" if bool(tail["feeds_fresh"].astype(bool).iloc[-1]) else "Stale"
    note_lines = [
        f"Fused anomaly now={risk_now:.2f}; 10-min risk={p10:.2f}. Confidence {band}.",
    ]
    if top3:
        note_lines.append("Top drivers: " + "; ".join(top3))
    if cue:
        note_lines.append("Propagation: " + cue)
    note_lines.append("Action: monitor in 5m; if risk rises above 0.60 or 3 consecutive reds, engage mitigation (widen slippage / reroute).")
    note = " ".join(note_lines)[:900]
    pdf_path = export_analyst_note_pdf(
        note_text=note,
        risk_now=risk_now,
        risk_10m=p10,
        risk_30m=None,
        contributors=top3,
        freshness=freshness,
        confidence=band,
        out_path=OUT / "analyst_note.pdf",
        title="Analyst Note v2"
    )
    return {"ok": True, "note": note, "pdf": str(pdf_path)}

In [55]:
def nightly_report() -> dict:
    """
    Nightly Markdown report:
      - winner detector by PR-AUC proxy
      - 10m forecast calibration bins
      - incident (red) summary
    Exports to PDF as well.
    """
    df = load_live()
    if df.empty:
        return {"ok": False, "markdown": "[report] no data"}
    thr = 0.003
    labels = (df["dev"].abs() >= thr).astype(int).values
    scores = {c: df[c].fillna(0.0).values for c in ["z_if","z_lof","z_ocsvm","z_cusum","z_ae","anom_fused"] if c in df.columns}
    ap = {}
    if _SK_OK and labels.sum() > 0 and labels.sum() < len(labels):
        try:
            from sklearn.metrics import average_precision_score as AP
            for k, s in scores.items():
                ap[k] = AP(labels, s)
        except Exception as e:
            print(f"[report] AP calc skipped: {e}")
    winner = max(ap.items(), key=lambda x: x[1])[0] if ap else "anom_fused"
    if FORECAST_10M_PARQUET.exists():
        try:
            f = pd.read_parquet(FORECAST_10M_PARQUET)
            if "y_10m" not in f.columns:
                f = f.merge(df[["ts","pool","y_10m"]] if "y_10m" in df.columns else pd.DataFrame(), on=["ts","pool"], how="left")
        except Exception as e:
            print(f"[report] parquet read failed: {e}")
            f = score_latest_10m(write_parquet=False)
            f["y_10m"] = _ensure_labels(df)
    else:
        f = score_latest_10m(write_parquet=False)
        if "y_10m" not in df.columns:
            df["y_10m"] = _ensure_labels(df); write_live(df)
        f["y_10m"] = _ensure_labels(df)
    bins = np.linspace(0, 1, 6)
    f["bin"] = np.digitize(f["risk_forecast_10m"].fillna(0.0), bins)
    calib = f.groupby("bin", dropna=False).agg(
        p=("risk_forecast_10m", "mean"),
        y=("y_10m", "mean"),
        n=("y_10m", "size")
    ).reset_index(drop=True)
    try:
        recent = df.sort_values("ts").groupby("pool").tail(min(100, len(df)))
        dec = decide_latest(n_tail=min(100, len(recent)))
        reds = dec[dec["level"] == "red"] if isinstance(dec, pd.DataFrame) else pd.DataFrame()
    except Exception as e:
        print(f"[report] decide_latest failed: {e}")
        reds = pd.DataFrame()
    md = []
    md.append("# Nightly Model Report")
    md.append(f"Generated: {_now_iso()}\n")
    md.append("## Winner Detector Today")
    md.append(f"- **Winner:** `{winner}`  (by AP proxy on |dev|≥0.3%)\n")
    if ap:
        md.append("AP scores:")
        for k, v in sorted(ap.items(), key=lambda z: z[1], reverse=True):
            md.append(f"- {k}: {v:.3f}")
        md.append("")
    md.append("## Forecast Calibration (10m)")
    for _, r in calib.iterrows():
        p = r["p"] if pd.notna(r["p"]) else 0.0
        y = r["y"] if pd.notna(r["y"]) else 0.0
        md.append(f"- bin≈{p:.2f}: observed {y:.2f} (n={int(r['n'])})")
    md.append("")
    md.append("## Incidents (last window)")
    if isinstance(reds, pd.DataFrame) and not reds.empty:
        for _, r in reds.iterrows():
            an = r.get("anom_fused", np.nan)
            p10 = r.get("risk_forecast_10m", np.nan)
            md.append(f"- {r['ts']} | {r['pool']} | fused={an:.2f} | p10={p10:.2f}")
    else:
        md.append("- None")
    md_text = "\n".join(md)
    pdf_path = export_markdown_pdf(md_text, OUT / "report.pdf", title="Depeg Sentinel — Nightly Report")
    return {"ok": True, "markdown": md_text[:800] + "...", "pdf": str(pdf_path)}

In [56]:
def main_demo():
    print(f"[env] Output base: {OUT}")
    print(f"[chain] mock_mode={CFG.mock_mode} rpc={CFG.eth_rpc}")
    sample_once()
    run_anomaly_zoo_update_live()
    df = load_live()
    if "y_10m" not in df.columns or df["y_10m"].nunique() < 2:
        df["y_10m"] = _ensure_labels(df); write_live(df)
    if _XGB_OK and _SK_OK:
        try:
            train_forecaster_10m()
            score_latest_10m()
            explain_forecast_10m()
        except Exception as e:
            print(f"[note] forecaster step skipped: {e}")
    else:
        print("[note] skipping forecaster (xgboost or sklearn missing)")
    compute_network_features()
    enrich_events_and_aggregate()
    try:
        dec = decide_latest(12); print(dec.tail(3))
    except Exception as e:
        print(f"[note] decide_latest skipped: {e}")
    print(build_analyst_note_v2())
    print(nightly_report())

In [57]:
def write_run_meta(extra: dict | None = None, path: Path = OUT / "RUN_META.json") -> Path:
    meta = {
        "ts": _now_iso(),
        "versions": {"python": sys.version.split()[0],
                     "numpy": getattr(np, "__version__", None),
                     "pandas": getattr(pd, "__version__", None),
                     "sklearn": None, "xgboost": None, "torch": None, "web3": None},
        "config": {"eth_rpc": CFG.eth_rpc, "mock_mode": CFG.mock_mode, "lookback": CFG.lookback, "pools": list(CFG.pools.keys())},
        "artifacts": {"live_csv": str(LIVE_CSV), "forecast_parquet": str(FORECAST_10M_PARQUET), "explain_json": str(EXPLAIN_JSON), "events_json": str(EVENTS_JSON)},
        "counts": {},
    }
    try: import sklearn; meta["versions"]["sklearn"] = sklearn.__version__
    except Exception: pass
    try: import xgboost as _x; meta["versions"]["xgboost"] = _x.__version__
    except Exception: pass
    try: import torch as _t; meta["versions"]["torch"] = _t.__version__
    except Exception: pass
    try: from web3 import __version__ as _w3v; meta["versions"]["web3"] = _w3v
    except Exception: pass
    df = load_live(); meta["counts"]["live_rows"] = int(len(df)); meta["counts"]["pools"] = int(df["pool"].nunique()) if not df.empty else 0
    if extra: meta.update(extra)
    Path(path).write_text(json.dumps(meta, indent=2)); print(f"[ok] RUN_META.json → {path}"); return path

In [58]:
def main_demo_next_steps(webhook_url: str | None = None, use_rag: bool = False):
    update_events_from_sources()
    enrich_events_and_aggregate()
    write_run_meta()
    if webhook_url:
        try:
            trigger_alerts_if_needed(webhook_url)
        except Exception as e:
            print(f"[alert] skipped: {e}")

In [59]:
import os

In [60]:
os.environ["HF_TOKEN"] = "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"
print("[secrets] HF_TOKEN set for this session.")

[secrets] HF_TOKEN set for this session.


In [61]:
from huggingface_hub import login
login(token=os.environ["HF_TOKEN"], add_to_git_credential=False)
print("[secrets] Hugging Face client logged in.")

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


[secrets] Hugging Face client logged in.


In [62]:
HF_TOKEN = os.getenv("HF_TOKEN") 
if HF_TOKEN:
    print("[secrets] HF_TOKEN loaded from environment.")
else:
    print("[secrets] HF_TOKEN not set (features that need HF may be disabled).")

[secrets] HF_TOKEN loaded from environment.


In [63]:
from typing import Any

In [64]:
def rpc_health_check(rpc: str) -> bool:
    if not (_WEB3_OK and rpc):
        return False
    try:
        w3 = Web3(Web3.HTTPProvider(rpc, request_kwargs={"timeout": 6}))
        ok = w3.is_connected() if hasattr(w3, "is_connected") else w3.isConnected()
        return bool(ok)
    except Exception:
        return False

In [65]:
class _ConfigProxy:
    """Guards ETH RPC config after init so you can't flip real/mock mid-run by accident."""
    def __init__(self, cfg_obj: Any):
        object.__setattr__(self, "_cfg", cfg_obj)
        object.__setattr__(self, "_locked", False)
    def lock(self):  object.__setattr__(self, "_locked", True)
    def unlock(self): object.__setattr__(self, "_locked", False)
    def __getattr__(self, k): return getattr(object.__getattribute__(self, "_cfg"), k)
    def __setattr__(self, k, v):
        if object.__getattribute__(self, "_locked") and k in ("eth_rpc", "mock_mode"):
            raise RuntimeError(f"[rpc] Config is locked. Refusing to set {k}. Restart kernel or call CFG.unlock().")
        setattr(object.__getattribute__(self, "_cfg"), k, v)

In [66]:
if not isinstance(CFG, _ConfigProxy):
    CFG = _ConfigProxy(CFG)
_RPC_INIT_DONE = False

In [67]:
def init_onchain_locked():
    """Initialize OnChainTool once, with health check, then lock RPC config."""
    global onchain, _RPC_INIT_DONE
    if _RPC_INIT_DONE:
        print("[rpc] Already initialized; not switching modes.")
        return onchain
    env_rpc = os.getenv("ETH_RPC")
    if env_rpc and "<YOUR_KEY>" not in env_rpc and rpc_health_check(env_rpc):
        CFG.eth_rpc = env_rpc
        CFG.mock_mode = False
        print(f"[rpc] Using real RPC: {CFG.eth_rpc}")
    else:
        CFG.mock_mode = True
        print("[rpc] Real RPC not available/healthy → mock mode.")
    onchain = OnChainTool(CFG.eth_rpc, mock=CFG.mock_mode)
    CFG.lock()
    _RPC_INIT_DONE = True
    return onchain

In [68]:
onchain = init_onchain_locked()

[rpc] Real RPC not available/healthy → mock mode.
[onchain] mock_mode enabled


In [69]:
def _define_once(flag_name: str) -> bool:
    """Return True if we should define/patch now; False if already defined."""
    if globals().get(flag_name, False):
        print(f"[guard] {flag_name} already set — skipping redefinition.")
        return False
    globals()[flag_name] = True
    return True

In [70]:
if _define_once("_CUSUM_SCORE_DEFINED"):
    def _cusum_score(dev: pd.Series, w: int = 30, k: float = 0.0) -> pd.Series:
        s = dev.fillna(0).astype(float).to_numpy()
        g_pos = np.zeros_like(s, dtype=float); g_neg = np.zeros_like(s, dtype=float)
        for i in range(1, len(s)):
            g_pos[i] = max(0.0, g_pos[i-1] + (s[i] - k))
            g_neg[i] = min(0.0, g_neg[i-1] + (s[i] + k))
        mag = np.abs(g_pos) + np.abs(g_neg)
        out = pd.Series(mag, index=dev.index)
        out_rolled = out.rolling(w, min_periods=5).apply(
            lambda z: z[-1] / (np.nanmax(np.abs(z)) + 1e-9),
            raw=True
        )
        return out_rolled.fillna(0.0)

In [71]:
if _define_once("_GET_UNIV3_PRICE_PATCHED"):
    def _get_univ3_price_precise(self, pool_addr: str) -> OnchainResult:
        ts = _now_iso()
        if self.mock or not pool_addr or pool_addr.lower().startswith("0x0000"):
            base = 1.0 + np.random.normal(0, 3e-4)
            return OnchainResult(True, ts, None, {"price": float(base), "sqrtPriceX96": None, "source": "mock"})
        try:
            _ABI_UNIV3_POOL = [
                {"name":"slot0","inputs":[],"outputs":[
                    {"type":"uint160","name":"sqrtPriceX96"},
                    {"type":"int24","name":"tick"},
                    {"type":"uint16","name":"observationIndex"},
                    {"type":"uint16","name":"observationCardinality"},
                    {"type":"uint16","name":"observationCardinalityNext"},
                    {"type":"uint8","name":"feeProtocol"},
                    {"type":"bool","name":"unlocked"}],
                 "stateMutability":"view","type":"function"},
                {"name":"token0","inputs":[],"outputs":[{"type":"address"}], "stateMutability":"view","type":"function"},
                {"name":"token1","inputs":[],"outputs":[{"type":"address"}], "stateMutability":"view","type":"function"},
            ]
            _ABI_ERC20_DEC = [
                {"name":"decimals","inputs":[],"outputs":[{"type":"uint8"}], "stateMutability":"view","type":"function"},
                {"name":"symbol","inputs":[],"outputs":[{"type":"string"}],   "stateMutability":"view","type":"function"},
            ]
            pool = Web3.to_checksum_address(pool_addr)
            c = self.w3.eth.contract(address=pool, abi=_ABI_UNIV3_POOL)
            slot0 = self._call(lambda: c.functions.slot0().call()); sqrtp = slot0[0]
            t0 = self._call(lambda: c.functions.token0().call()); t1 = self._call(lambda: c.functions.token1().call())
            t0c = self.w3.eth.contract(address=t0, abi=_ABI_ERC20_DEC)
            t1c = self.w3.eth.contract(address=t1, abi=_ABI_ERC20_DEC)
            d0 = int(self._call(lambda: t0c.functions.decimals().call()))
            d1 = int(self._call(lambda: t1c.functions.decimals().call()))
            p_raw = (sqrtp / (2**96))**2
            price = float(p_raw * (10 ** (d0 - d1)))
            block = self.w3.eth.block_number
            return OnchainResult(True, ts, block, {
                "price": price, "sqrtPriceX96": int(sqrtp),
                "token0": t0, "token1": t1, "decimals0": d0, "decimals1": d1, "source": "uniswap_v3"
            })
        except Exception as e:
            return OnchainResult(False, ts, None, {}, str(e))
    OnChainTool.get_univ3_price = _get_univ3_price_precise

In [72]:
if _define_once("_MAIN_DEMO_DEFINED"):
    def main_demo():
        print(f"[env] Output base: {OUT}")
        print(f"[chain] mock_mode={CFG.mock_mode} rpc={CFG.eth_rpc}")
        sample_once()
        run_anomaly_zoo_update_live()
        df = load_live()
        if "y_10m" not in df.columns or df["y_10m"].nunique() < 2:
            df["y_10m"] = _ensure_labels(df); write_live(df)
        if _XGB_OK and _SK_OK:
            try:
                train_forecaster_10m()
                score_latest_10m()
                explain_forecast_10m()
            except Exception as e:
                print(f"[note] forecaster step skipped: {e}")
        else:
            print("[note] skipping forecaster (xgboost or sklearn missing)")
        compute_network_features()
        enrich_events_and_aggregate()
        try:
            dec = decide_latest(12); print(dec.tail(3))
        except Exception as e:
            print(f"[note] decide_latest skipped: {e}")
        print(build_analyst_note_v2())
        print(nightly_report())

In [73]:
if _define_once("_MAIN_DEMO_NEXT_DEFINED"):
    def main_demo_next_steps(webhook_url: str | None = None, use_rag: bool = False):
        update_events_from_sources()
        enrich_events_and_aggregate()
        write_run_meta()
        if webhook_url:
            try:
                trigger_alerts_if_needed(webhook_url)
            except Exception as e:
                print(f"[alert] skipped: {e}")

In [74]:
if _define_once("_SEVERITYMODEL_FIT_PATCHED"):
    def _sev_fit_patch(self, texts, labels):
        if not texts:
            warnings.warn("[warn] fit called with empty texts; skipping."); return self
        if len(texts) != len(labels):
            raise ValueError("texts and labels must have the same length")
        y = np.asarray(labels, dtype=int)
        if getattr(self, "emb", None) is None:
            p = float(np.mean(y)) if y.size else 0.0
            self.model = {"prior": max(0.0, min(1.0, p))}
            warnings.warn("[note] sentence-transformers missing; using prior-only severity."); return self
        uniq = np.unique(y)
        if not getattr(self, "_sk_ok", False) or uniq.size < 2:
            p = float(np.mean(y)) if y.size else 0.0
            self.model = {"prior": max(0.0, min(1.0, p))}
            if not getattr(self, "_sk_ok", False):
                warnings.warn("[note] sklearn missing; using prior-only severity.")
            elif uniq.size < 2:
                warnings.warn("[note] one-class labels; using prior-only severity.")
            return self
        X = self._embed(texts)
        try:
            clf = self._LR(max_iter=200); clf.fit(X, y); self.model = clf
        except Exception as e:
            p = float(np.mean(y))
            self.model = {"prior": max(0.0, min(1.0, p))}
            warnings.warn(f"[note] LR fit failed: {e}. Using prior-only severity.")
        return self
    SeverityModel.fit = _sev_fit_patch

In [75]:
try:
    _define_once
except NameError:
    def _define_once(flag_name: str) -> bool:
        if globals().get(flag_name, False):
            print(f"[guard] {flag_name} already set — skipping.")
            return False
        globals()[flag_name] = True
        return True

In [76]:
if _define_once("_STORAGE_HELPERS_DEFINED"):
    import os
    import pandas as pd
    import numpy as np
    from pathlib import Path
    from datetime import datetime, timezone
    OUT = Path(globals().get("OUT", Path.cwd() / "outputs"))
    OUT.mkdir(parents=True, exist_ok=True)
    LIVE_CSV = globals().get("LIVE_CSV", OUT / "live_dataset.csv")
    CANON_COLS = globals().get("CANON_COLS", [
        "ts","pool","dex_spot","dex_twap","oracle_ratio","dev","dev_roll_std",
        "tvl_outflow_rate","virtual_price","spot_twap_gap_bps","r0","r1","r0_delta","r1_delta",
        "event_severity_max_24h","event_count_24h","feeds_fresh","run_quality_pass","model_version","block",
        "z_if","z_lof","z_ocsvm","z_cusum","z_ae","anom_fused",
        "y_10m",
        "neighbor_max_dev","neighbor_avg_anom","lead_lag_best","corr_best",
    ])
    def _now_iso() -> str:
        return datetime.now(timezone.utc).isoformat(timespec="seconds")
    def _iso_str(s) -> str:
        ts = pd.to_datetime(s, errors="coerce", utc=True)
        if isinstance(ts, pd.Series):
            return ts.dt.strftime("%Y-%m-%d %H:%M:%S%z").fillna("")
        return ts.strftime("%Y-%m-%d %H:%M:%S%z") if not pd.isna(ts) else ""
    def ensure_live_schema(df: pd.DataFrame) -> pd.DataFrame:
        df = df.copy()
        for col in CANON_COLS:
            if col not in df.columns:
                df[col] = np.nan
        return df[CANON_COLS]
    def write_live(df: pd.DataFrame) -> Path:
        df2 = ensure_live_schema(df)
        if "ts" in df2.columns:
            df2["ts"] = _iso_str(df2["ts"])
        LIVE_CSV.parent.mkdir(parents=True, exist_ok=True)
        df2.to_csv(LIVE_CSV, index=False)
        return LIVE_CSV
    def load_live(n: int | None = None) -> pd.DataFrame:
        if not LIVE_CSV.exists():
            return pd.DataFrame(columns=CANON_COLS)
        df = pd.read_csv(LIVE_CSV, dtype=str)
        num_cols = [c for c in CANON_COLS if c not in ("ts","pool","model_version","block")]
        for c in num_cols:
            if c in df.columns:
                df[c] = pd.to_numeric(df[c], errors="coerce")
        if "ts" in df.columns:
            df["ts"] = pd.to_datetime(df["ts"], utc=True, errors="coerce")
        df = ensure_live_schema(df)
        if n:
            df = df.tail(n)
        return df
    def append_live(rows: pd.DataFrame) -> Path:
        base = load_live()
        merged = pd.concat([base, rows], ignore_index=True)
        write_live(merged)
        print(f"[append] {len(rows)} rows → {LIVE_CSV}")
        return LIVE_CSV
    print(f"[io] LIVE_CSV = {LIVE_CSV}")

[io] LIVE_CSV = C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\live_dataset.csv


In [77]:
import numpy as np
import pandas as pd
from collections import deque
if _define_once("_NETWORK_FEATURES_BLOCK_DEFINED"):
    _NET_WIN = globals().get("_NET_WIN", 60)
    _hist_dev  = globals().get("_hist_dev", {})
    _hist_anom = globals().get("_hist_anom", {})
    def _push_hist(d: dict, key: str, val: float, win: int = _NET_WIN):
        q = d.get(key)
        if q is None:
            q = deque(maxlen=win)
            d[key] = q
        q.append(float(val) if pd.notna(val) else 0.0)
    def _rolling_corr(x: np.ndarray, y: np.ndarray) -> float:
        if len(x) < 3 or len(y) < 3 or len(x) != len(y):
            return np.nan
        x = np.asarray(x); y = np.asarray(y)
        if x.std() == 0 or y.std() == 0:
            return 0.0
        return float(np.corrcoef(x, y)[0,1])
    def _cross_corr_lag(x: np.ndarray, y: np.ndarray, max_lag: int = 10) -> tuple[float,int]:
        if len(x) < 5 or len(y) < 5:
            return np.nan, 0
        x = np.asarray(x); y = np.asarray(y)
        best, best_lag = -2.0, 0
        for lag in range(-max_lag, max_lag+1):
            if lag < 0:
                xs, ys = x[:lag], y[-lag:]
            elif lag > 0:
                xs, ys = x[lag:], y[:-lag]
            else:
                xs, ys = x, y
            if len(xs) < 5:
                continue
            c = _rolling_corr(xs, ys)
            if np.isnan(c): 
                continue
            if c > best:
                best, best_lag = c, lag
        return float(best), int(best_lag)
    def compute_network_features() -> pd.DataFrame:
        """Compute neighbor_max_dev / neighbor_avg_anom and best lead/lag correlation per pool.
           Updates the last rows in live CSV and returns a small summary frame.
        """
        if "load_live" not in globals() or "write_live" not in globals():
            raise RuntimeError("load_live/write_live not found — run the storage helpers patch first.")
        df = load_live().copy()
        if df.empty:
            print("[network] live is empty; skipping")
            return pd.DataFrame(columns=["ts","pool","neighbor_max_dev","neighbor_avg_anom","lead_lag_best","corr_best"])
        for col in ("pool","dev","anom_fused","ts"):
            if col not in df.columns:
                df[col] = np.nan
        df = df.sort_values(["ts"])
        for _, r in df.iterrows():
            _push_hist(_hist_dev,  str(r["pool"]), float(r.get("dev", 0.0)))
            _push_hist(_hist_anom, str(r["pool"]), float(r.get("anom_fused", 0.0)))
        pools = list(df["pool"].dropna().unique())
        rows = []
        for p in pools:
            dev_p = list(_hist_dev.get(p, []))
            neigh = [q for q in pools if q != p]
            neigh_max_dev  = 0.0
            neigh_avg_anom = 0.0
            lead_lag_best  = 0
            corr_best      = 0.0
            for q in neigh:
                dev_q = list(_hist_dev.get(q, []))
                an_q  = list(_hist_anom.get(q, []))
                if dev_q:
                    neigh_max_dev = max(neigh_max_dev, float(np.nanmax(np.abs(dev_q))))
                if an_q:
                    neigh_avg_anom += float(np.nanmean(an_q))
                if dev_p and dev_q:
                    c, lag = _cross_corr_lag(np.array(dev_p), np.array(dev_q), max_lag=10)
                    if np.isfinite(c) and abs(c) > abs(corr_best):
                        corr_best, lead_lag_best = c, lag
            if len(neigh) > 0:
                neigh_avg_anom /= len(neigh)
            rows.append({
                "pool": p,
                "neighbor_max_dev": float(neigh_max_dev),
                "neighbor_avg_anom": float(neigh_avg_anom),
                "lead_lag_best": int(lead_lag_best),
                "corr_best": float(corr_best),
            })
        net = pd.DataFrame(rows)
        latest = df.groupby("pool", as_index=False).tail(1).copy()
        for col, default in [
            ("neighbor_max_dev", np.nan),
            ("neighbor_avg_anom", np.nan),
            ("lead_lag_best", 0),
            ("corr_best", np.nan),
        ]:
            if col not in latest.columns:
                latest[col] = default
        if not net.empty:
            latest = latest.drop(columns=["neighbor_max_dev","neighbor_avg_anom","lead_lag_best","corr_best"], errors="ignore")
            latest = latest.merge(net, on="pool", how="left")
            latest["neighbor_max_dev"]  = latest["neighbor_max_dev"].astype(float)
            latest["neighbor_avg_anom"] = latest["neighbor_avg_anom"].astype(float)
            latest["lead_lag_best"]     = latest["lead_lag_best"].fillna(0).astype(int)
            latest["corr_best"]         = latest["corr_best"].astype(float)
        live = load_live()
        live["ts_str"]   = _iso_str(live["ts"])
        latest["ts_str"] = _iso_str(latest["ts"])
        live_idx = live.set_index(["pool","ts_str"])
        upd_idx  = latest.set_index(["pool","ts_str"])[["neighbor_max_dev","neighbor_avg_anom","lead_lag_best","corr_best"]]
        live_idx.update(upd_idx)
        live_updated = live_idx.reset_index().drop(columns=["ts_str"])
        write_live(live_updated)
        print("[network] updated neighbor features & corr/lag on last rows")
        return latest[["ts","pool","neighbor_max_dev","neighbor_avg_anom","lead_lag_best","corr_best"]].sort_values("pool")

In [78]:
import numpy as np
import pandas as pd

In [79]:
if _define_once("_EXPLAIN_FORECAST_PATCHED"):
    def explain_forecast_10m(feature_cols: list | None = None, n_repeats: int = 8) -> dict:
        """Permutation-importance on the 10m forecaster; robust to NaNs & one-class edge cases."""
        if not globals().get("_SK_OK", False):
            payload = {"ts": _now_iso(), "top_contributors": [], "all_features": []}
            EXPLAIN_JSON.write_text(json.dumps(payload, indent=2))
            print(f"[explain] wrote {EXPLAIN_JSON}")
            return payload
        df = load_live()
        if df.empty:
            raise ValueError("live_dataset is empty.")
        if "y_10m" not in df.columns or df["y_10m"].nunique(dropna=True) < 2:
            df["y_10m"] = _ensure_labels(df)
            write_live(df)
        if feature_cols is None:
            feature_cols = [
                "dev","dev_roll_std","tvl_outflow_rate","spot_twap_gap_bps",
                "oracle_ratio","anom_fused","r0_delta","r1_delta",
                "event_severity_max_24h","event_count_24h"
            ]
        use_cols = [c for c in feature_cols if c in df.columns]
        X = df[use_cols].replace([np.inf,-np.inf], np.nan).fillna(0.0).to_numpy()
        y = pd.to_numeric(df["y_10m"], errors="coerce").fillna(0).astype(int).to_numpy()
        tr_idx, te_idx = _time_split_idx(df["ts"], 0.70)
        if te_idx.size == 0:
            payload = {"ts": _now_iso(), "top_contributors": [], "all_features": []}
            EXPLAIN_JSON.write_text(json.dumps(payload, indent=2))
            print("[explain] not enough test data; wrote empty payload.")
            return payload
        Xte, yte = X[te_idx], y[te_idx]
        clf, calib = _load_forecaster()
        if clf is None and calib is None:
            _ = train_forecaster_10m(feature_cols=use_cols)
            clf, calib = _load_forecaster()
        model = calib if calib is not None else clf
        if len(np.unique(yte)) < 2:
            payload = {"ts": _now_iso(), "top_contributors": [], "all_features": []}
            EXPLAIN_JSON.write_text(json.dumps(payload, indent=2))
            print("[explain] one-class test labels; wrote empty payload.")
            return payload
        from sklearn.inspection import permutation_importance
        r = permutation_importance(model, Xte, yte, n_repeats=n_repeats, scoring="average_precision", random_state=42)
        imp = sorted(zip(use_cols, r.importances_mean, r.importances_std), key=lambda z: z[1], reverse=True)
        top3 = [f"{name} (+{mean:.4f}±{std:.4f} AP)" for name, mean, std in imp[:3]]
        payload = {"ts": _now_iso(), "top_contributors": top3, "all_features": [{"feature": n, "mean": float(m), "std": float(s)} for n,m,s in imp]}
        EXPLAIN_JSON.write_text(json.dumps(payload, indent=2))
        print(f"[explain] wrote {EXPLAIN_JSON}")
        return payload

In [80]:
print("[run] sample → zoo → train/score → explain → reports")
onchain = init_onchain_locked()  
main_demo()
main_demo_next_steps(webhook_url=None, use_rag=False)

[run] sample → zoo → train/score → explain → reports
[rpc] Already initialized; not switching modes.
[env] Output base: C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel
[chain] mock_mode=True rpc=https://cloudflare-eth.com
[append] 3 rows → C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\live_dataset.csv
[zoo] anomaly scores updated
[note] forecaster step skipped: Invalid classes inferred from unique values of `y`.  Expected: [0], got [1]


  live_idx.update(upd_idx)
  live_idx.update(upd_idx)
  live_idx.update(upd_idx)
  live_idx.update(upd_idx)


[network] updated neighbor features & corr/lag on last rows


  live_idx.update(upd_idx)
  live_idx.update(upd_idx)


[enrich] wrote event aggregates to live tail rows
[note] decide_latest skipped: name 'decide_latest' is not defined


  live_idx.update(upd_idx)
  live_idx.update(upd_idx)
  live_idx.update(upd_idx)
  live_idx.update(upd_idx)


[network] updated neighbor features & corr/lag on last rows
[ok] Analyst Note exported → C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\analyst_note.pdf
{'ok': True, 'note': 'Fused anomaly now=1.00; 10-min risk=0.62. Confidence High. Top drivers: dev_roll_std (+0.0654±0.0167 AP); oracle_ratio (+0.0201±0.0102 AP); r1_delta (+0.0012±0.0012 AP) Propagation: DAI/USDC_univ3 leads peers (corr=0.96). Action: monitor in 5m; if risk rises above 0.60 or 3 consecutive reds, engage mitigation (widen slippage / reroute).', 'pdf': 'C:\\Users\\aniru\\OneDrive\\Desktop\\ML tutorial\\DEFI Depeg sentinel\\analyst_note.pdf'}
[report] decide_latest failed: name 'decide_latest' is not defined
[ok] Markdown PDF exported → C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\report.pdf
{'ok': True, 'markdown': '# Nightly Model Report\nGenerated: 2025-08-28T20:49:17+00:00\n\n## Winner Detector Today\n- **Winner:** `z_if`  (by AP proxy on |dev|≥0.3%)\n\nAP scores:\n- z_if: 0.778\n- 

  live_idx.update(upd_idx)
  live_idx.update(upd_idx)


In [81]:
FORECAST_30M_PATH = OUT_MODEL / "forecast_30m_xgb.joblib"
CALIB_30M_PATH    = OUT_MODEL / "forecast_30m_calib.joblib"
FORECAST_30M_PARQUET = OUT / "forecast_30m.parquet"

In [82]:
if "y_30m" not in CANON_COLS:
    CANON_COLS.insert(CANON_COLS.index("y_10m")+1, "y_30m")

In [83]:
def main_demo():
    print(f"[env] Output base: {OUT}")
    print(f"[chain] mock_mode={CFG.mock_mode} rpc={CFG.eth_rpc}")
    rollover_if_needed()
    sample_once()
    run_anomaly_zoo_update_live()
    df = load_live()
    df = ensure_targets_all(df) 
    if _XGB_OK and _SK_OK:
        try:
            train_forecaster_10m()
            score_latest_10m()
            explain_forecast_10m()
            train_forecaster_30m()
            score_latest_30m()
        except Exception as e:
            print(f"[note] forecaster step skipped: {e}")
    else:
        print("[note] skipping forecaster (xgboost or sklearn missing)")
    compute_network_features()
    enrich_events_and_aggregate()
    try:
        dec = decide_latest(12)
        print(dec.tail(3))
    except Exception as e:
        print(f"[note] decide_latest skipped: {e}")
    print(build_analyst_note_v2())
    print(nightly_report())

In [84]:
def rotate_live_to_archive(today_utc: Optional[pd.Timestamp] = None) -> dict:
    """
    Move rows with ts date < today_utc to archive/live_YYYYMMDD.csv (append).
    Keep only today's rows in LIVE_CSV.
    """
    if not LIVE_CSV.exists():
        return {"moved": 0, "kept": 0, "files": []}
    try:
        df = load_live()  
    except Exception as e:
        print(f"[archive] load failed: {e}")
        return {"moved": 0, "kept": 0, "files": []}
    if df.empty or "ts" not in df.columns:
        return {"moved": 0, "kept": 0, "files": []}
    today = (today_utc or pd.Timestamp.utcnow()).normalize()
    dts = df["ts"].dt.normalize()
    old_mask = dts < today
    if not old_mask.any():
        return {"moved": 0, "kept": int((~old_mask).sum()), "files": []}
    moved = int(old_mask.sum())
    kept = int((~old_mask).sum())
    files = []
    df_old = df.loc[old_mask].copy()
    df_old["date"] = df_old["ts"].dt.strftime("%Y%m%d")
    for d, chunk in df_old.groupby("date"):
        path = OUT_ARCHIVE / f"live_{d}.csv"
        header = not path.exists()
        ensure_live_schema(chunk.drop(columns=["date"], errors="ignore")).to_csv(path, index=False, header=header, mode="a", lineterminator="\n")
        files.append(str(path))
    df_new = df.loc[~old_mask].copy()
    write_live(df_new)
    print(f"[archive] moved={moved} kept={kept} files={len(files)}")
    return {"moved": moved, "kept": kept, "files": files}
def rollover_if_needed():
    try:
        return rotate_live_to_archive()
    except Exception as e:
        print(f"[archive] rollover skipped: {e}")
        return {"moved": 0, "kept": 0, "files": []}

In [85]:
def ensure_targets_all(df: pd.DataFrame | None = None, base_thr: float = 0.005) -> pd.DataFrame:
    df = load_live() if df is None else df.copy()
    if df.empty:
        return df
    df = df.sort_values(["pool","ts"]).reset_index(drop=True)
    if "y_10m" not in df.columns or df["y_10m"].isna().any() or df["y_10m"].nunique(dropna=True) < 2:
        df["y_10m"] = _ensure_labels(df, base_thr=base_thr)
    if "y_30m" not in df.columns or df["y_30m"].isna().any():
        df["y_30m"] = label_targets(
            df, horizon=30, dev_thr=base_thr, fused_col="anom_fused" if "anom_fused" in df.columns else None
        ).astype(int)
    return df

In [86]:
df = load_live()
df = ensure_targets_all(df)

  cond_dev = np.nanmax(g_dev[j1:j2])
  cond_dev = np.nanmax(g_dev[j1:j2])


In [87]:
def _load_forecaster_30m():
    clf = load(FORECAST_30M_PATH) if FORECAST_30M_PATH.exists() else None
    calib = load(CALIB_30M_PATH) if CALIB_30M_PATH.exists() else None
    return clf, calib

In [88]:
def train_forecaster_30m(feature_cols: List[str] | None = None, label_col: str = "y_30m"):
    if not _XGB_OK or not _SK_OK: raise RuntimeError("xgboost + scikit-learn required to train 30m forecaster.")
    df = load_live().copy()
    if df.empty: raise ValueError("live_dataset is empty. sample more rows first.")
    df = ensure_targets_all(df)
    if feature_cols is None:
        feature_cols = ["dev","dev_roll_std","tvl_outflow_rate","spot_twap_gap_bps","oracle_ratio",
                        "anom_fused","r0_delta","r1_delta","event_severity_max_24h","event_count_24h"]
    use_cols = [c for c in feature_cols if c in df.columns]
    X = df[use_cols].replace([np.inf,-np.inf], np.nan).fillna(0.0).to_numpy()
    y = pd.to_numeric(df[label_col], errors="coerce").fillna(0).astype(int).to_numpy()
    ts = df["ts"]; tr_idx, te_idx = _time_split_idx(ts, 0.70)
    Xtr, Xte = X[tr_idx], X[te_idx]; ytr, yte = y[tr_idx], y[te_idx]
    clf = xgb.XGBClassifier(n_estimators=300, max_depth=4, learning_rate=0.06,
                            subsample=0.9, colsample_bytree=0.8,
                            objective="binary:logistic", eval_metric="logloss",
                            random_state=SEED, n_jobs=0)
    clf.fit(Xtr, ytr)
    calib = None
    unique, counts = np.unique(ytr, return_counts=True)
    min_cls = min(counts) if len(counts)==2 else 0
    if min_cls >= 3:
        try: calib = CalibratedClassifierCV(clf, method="isotonic", cv=3).fit(Xtr, ytr)
        except Exception: calib = None
    if calib is None and min_cls >= 2:
        try: calib = CalibratedClassifierCV(clf, method="sigmoid", cv=2).fit(Xtr, ytr)
        except Exception: calib = None
    def _proba(model, X_): return (model or clf).predict_proba(X_)[:,1]
    ap, bs = float("nan"), float("nan")
    if len(np.unique(yte))>1:
        try: ap = average_precision_score(yte, _proba(calib, Xte))
        except Exception: pass
        try: bs = float(brier_score_loss(yte, _proba(calib, Xte)))
        except Exception: bs = float("nan")
    print(f"[forecast30m] AP={ap if pd.notna(ap) else float('nan'):.3f}  Brier={bs if pd.notna(bs) else float('nan'):.3f}  n_te={len(yte)}  calib={'iso3' if (calib and getattr(calib,'method','')=='isotonic') else ('sig2' if calib else 'none')}")
    dump(clf, FORECAST_30M_PATH)
    if calib: dump(calib, CALIB_30M_PATH)
    elif CALIB_30M_PATH.exists(): CALIB_30M_PATH.unlink()
    tail = df.tail(6).copy()
    X_tail = tail[use_cols].replace([np.inf,-np.inf], np.nan).fillna(0.0).to_numpy()
    tail["risk_forecast_30m"] = _proba(calib, X_tail)
    return tail[["ts","pool","anom_fused","risk_forecast_30m"]]

In [89]:
def score_latest_30m(n_tail: int = 60, feature_cols: Sequence[str] | None = None, write_parquet: bool = True) -> pd.DataFrame:
    df = load_live()
    if df.empty: raise ValueError("live_dataset is empty. Run sample_once() first.")
    if feature_cols is None:
        feature_cols = ["dev","dev_roll_std","tvl_outflow_rate","spot_twap_gap_bps","oracle_ratio",
                        "anom_fused","r0_delta","r1_delta","event_severity_max_24h","event_count_24h"]
    use_cols = [c for c in feature_cols if c in df.columns]
    X_tail = df.tail(n_tail)[use_cols].replace([np.inf,-np.inf], np.nan).fillna(0.0).to_numpy()
    clf, calib = _load_forecaster_30m()
    if clf is None and calib is None:
        _ = train_forecaster_30m(feature_cols=list(use_cols))
        clf, calib = _load_forecaster_30m()
    model = calib if calib is not None else clf
    p = model.predict_proba(X_tail)[:,1]
    out = df.tail(n_tail).copy(); out["risk_forecast_30m"] = p
    out = out[["ts","pool","anom_fused","risk_forecast_30m"]]
    if write_parquet:
        try:
            out.to_parquet(FORECAST_30M_PARQUET, index=False); print(f"[forecast30m] wrote {FORECAST_30M_PARQUET}")
        except Exception as e:
            print(f"[warn] parquet write failed: {e}")
    return out.tail(10)

In [90]:
if _XGB_OK and _SK_OK:
    try:
        train_forecaster_10m(); score_latest_10m(); explain_forecast_10m()
        train_forecaster_30m(); score_latest_30m()
    except Exception as e:
        print(f"[note] forecaster step skipped: {e}")

[note] forecaster step skipped: Invalid classes inferred from unique values of `y`.  Expected: [0], got [1]


In [91]:
try:
    scored30 = score_latest_30m(n_tail=len(tail), write_parquet=False)
    if not scored30.empty:
        rmax30 = scored30.sort_values("risk_forecast_30m", ascending=False).iloc[0]
        p30 = float(rmax30.get("risk_forecast_30m", 0.0))
    else:
        p30 = 0.0
except Exception:
    p30 = 0.0

In [92]:
def build_analyst_note_v2() -> dict:
    """Build a short analyst note + export PDF. No NameErrors, even if 30m model is missing."""
    import json
    import numpy as np
    import pandas as pd
    tail = load_live().sort_values("ts").groupby("pool").tail(1)
    if tail.empty:
        return {"ok": False, "reason": "no data"}
    top3 = []
    try:
        if EXPLAIN_JSON.exists():
            explain = json.loads(EXPLAIN_JSON.read_text())
        else:
            explain = explain_forecast_10m()
        top3 = (explain or {}).get("top_contributors", [])[:3]
    except Exception:
        pass
    cue = None
    try:
        net = compute_network_features()
        if isinstance(net, pd.DataFrame) and not net.empty:
            nrow = net.sort_values("neighbor_avg_anom", ascending=False).iloc[0]
            lead_str = "leads" if nrow["lead_lag_best"] > 0 else ("lags" if nrow["lead_lag_best"] < 0 else "co-moves")
            cue = f"{nrow['pool']} {lead_str} peers (corr={float(nrow['corr_best']):.2f})."
    except Exception:
        pass
    risk_now = 0.0  
    p10 = 0.0      
    p30 = 0.0       
    try:
        n_tail = max(1, len(tail))
        scored10 = score_latest_10m(n_tail=n_tail, write_parquet=False)
        if isinstance(scored10, pd.DataFrame) and not scored10.empty:
            rmax10 = scored10.sort_values("risk_forecast_10m", ascending=False).iloc[0]
            p10 = float(rmax10.get("risk_forecast_10m", 0.0))
            risk_now = float(rmax10.get("anom_fused", 0.0))
    except Exception:
        pass
    if not np.isfinite(risk_now) or risk_now == 0.0:
        try:
            risk_now = float(tail.get("anom_fused", pd.Series([0.0])).iloc[-1])
        except Exception:
            risk_now = 0.0
    try:
        if "score_latest_30m" in globals():
            n_tail = max(1, len(tail))
            scored30 = score_latest_30m(n_tail=n_tail, write_parquet=False)
            if isinstance(scored30, pd.DataFrame) and not scored30.empty:
                rmax30 = scored30.sort_values("risk_forecast_30m", ascending=False).iloc[0]
                p30 = float(rmax30.get("risk_forecast_30m", 0.0))
    except Exception:
        p30 = 0.0
    band = (
        "High" if (p10 >= 0.60 or p30 >= 0.60 or risk_now >= 0.90)
        else ("Medium" if (p10 >= 0.35 or p30 >= 0.50 or risk_now >= 0.70) else "Low")
    )
    try:
        freshness = "Fresh" if bool(tail["feeds_fresh"].iloc[-1]) else "Stale"
    except Exception:
        freshness = "Unknown"
    note_lines = [
        f"Fused anomaly now={risk_now:.2f}; 10-min risk={p10:.2f}; 30-min risk={p30:.2f}. Confidence {band}."
    ]
    if top3:
        note_lines.append("Top drivers: " + "; ".join(top3))
    if cue:
        note_lines.append("Propagation: " + cue)
    note_lines.append(
        "Action: monitor in 5m; if risk > 0.60 or 3 consecutive reds, engage mitigation (widen slippage / reroute)."
    )
    note = " ".join(note_lines)[:900]
    try:
        pdf_path = export_analyst_note_pdf(
            note_text=note,
            risk_now=risk_now,
            risk_10m=p10,
            risk_30m=p30,
            contributors=top3,
            freshness=freshness,
            confidence=band,
            out_path=OUT / "analyst_note.pdf",
            title="Analyst Note v2"
        )
    except Exception:
        pdf_path = OUT / "analyst_note.txt"
        try:
            pdf_path.write_text(note)
        except Exception:
            pass
    return {"ok": True, "note": note, "pdf": str(pdf_path)}

In [93]:
res = build_analyst_note_v2()
print(res)

  live_idx.update(upd_idx)
  live_idx.update(upd_idx)
  live_idx.update(upd_idx)
  live_idx.update(upd_idx)


[network] updated neighbor features & corr/lag on last rows
[ok] Analyst Note exported → C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\analyst_note.pdf
{'ok': True, 'note': 'Fused anomaly now=1.00; 10-min risk=0.62; 30-min risk=0.86. Confidence High. Top drivers: dev_roll_std (+0.0654±0.0167 AP); oracle_ratio (+0.0201±0.0102 AP); r1_delta (+0.0012±0.0012 AP) Propagation: DAI/USDC_univ3 leads peers (corr=0.96). Action: monitor in 5m; if risk > 0.60 or 3 consecutive reds, engage mitigation (widen slippage / reroute).', 'pdf': 'C:\\Users\\aniru\\OneDrive\\Desktop\\ML tutorial\\DEFI Depeg sentinel\\analyst_note.pdf'}


In [94]:
import json, numpy as np, pandas as pd

In [95]:
tail = load_live().sort_values("ts").groupby("pool").tail(1)

In [96]:
top3 = []
try:
    if EXPLAIN_JSON.exists():
        explain = json.loads(EXPLAIN_JSON.read_text())
    else:
        explain = explain_forecast_10m()
    top3 = (explain or {}).get("top_contributors", [])[:3]
except Exception:
    pass

In [97]:
cue = None
try:
    net = compute_network_features()
    if isinstance(net, pd.DataFrame) and not net.empty:
        nrow = net.sort_values("neighbor_avg_anom", ascending=False).iloc[0]
        lead_str = "leads" if nrow["lead_lag_best"] > 0 else ("lags" if nrow["lead_lag_best"] < 0 else "co-moves")
        cue = f"{nrow['pool']} {lead_str} peers (corr={float(nrow['corr_best']):.2f})."
except Exception:
    pass

[network] updated neighbor features & corr/lag on last rows


  live_idx.update(upd_idx)
  live_idx.update(upd_idx)
  live_idx.update(upd_idx)
  live_idx.update(upd_idx)


In [98]:
risk_now = 0.0
p10 = 0.0
p30 = 0.0
try:
    n_tail = max(1, len(tail))
    scored10 = score_latest_10m(n_tail=n_tail, write_parquet=False)
    if isinstance(scored10, pd.DataFrame) and not scored10.empty:
        rmax10 = scored10.sort_values("risk_forecast_10m", ascending=False).iloc[0]
        p10 = float(rmax10.get("risk_forecast_10m", 0.0))
        risk_now = float(rmax10.get("anom_fused", 0.0))
except Exception:
    pass
if not np.isfinite(risk_now) or risk_now == 0.0:
    try:
        risk_now = float(tail.get("anom_fused", pd.Series([0.0])).iloc[-1])
    except Exception:
        risk_now = 0.0

In [99]:
try:
    if "score_latest_30m" in globals():
        scored30 = score_latest_30m(n_tail=max(1, len(tail)), write_parquet=False)
        if isinstance(scored30, pd.DataFrame) and not scored30.empty:
            rmax30 = scored30.sort_values("risk_forecast_30m", ascending=False).iloc[0]
            p30 = float(rmax30.get("risk_forecast_30m", 0.0))
except Exception:
    p30 = 0.0

In [100]:
band = (
    "High" if (p10 >= 0.60 or p30 >= 0.60 or risk_now >= 0.90)
    else ("Medium" if (p10 >= 0.35 or p30 >= 0.50 or risk_now >= 0.70) else "Low")
)
try:
    freshness = "Fresh" if bool(tail["feeds_fresh"].iloc[-1]) else "Stale"
except Exception:
    freshness = "Unknown"

In [101]:
note_lines = [f"Fused anomaly now={risk_now:.2f}; 10-min risk={p10:.2f}; 30-min risk={p30:.2f}. Confidence {band}."]
if top3:
    note_lines.append("Top drivers: " + "; ".join(top3))
if cue:
    note_lines.append("Propagation: " + cue)
note_lines.append("Action: monitor in 5m; if risk > 0.60 or 3 consecutive reds, engage mitigation (widen slippage / reroute).")
note = " ".join(note_lines)[:900]
try:
    pdf_path = export_analyst_note_pdf(
        note_text=note,
        risk_now=risk_now,
        risk_10m=p10,
        risk_30m=p30,
        contributors=top3,
        freshness=freshness,
        confidence=band,
        out_path=OUT / "analyst_note.pdf",
        title="Analyst Note v2"
    )
except Exception:
    pdf_path = OUT / "analyst_note.txt"
    try:
        pdf_path.write_text(note)
    except Exception:
        pass
print({"ok": True, "note": note, "pdf": str(pdf_path)})

[ok] Analyst Note exported → C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\analyst_note.pdf
{'ok': True, 'note': 'Fused anomaly now=1.00; 10-min risk=0.62; 30-min risk=0.86. Confidence High. Top drivers: dev_roll_std (+0.0654±0.0167 AP); oracle_ratio (+0.0201±0.0102 AP); r1_delta (+0.0012±0.0012 AP) Propagation: DAI/USDC_univ3 leads peers (corr=0.96). Action: monitor in 5m; if risk > 0.60 or 3 consecutive reds, engage mitigation (widen slippage / reroute).', 'pdf': 'C:\\Users\\aniru\\OneDrive\\Desktop\\ML tutorial\\DEFI Depeg sentinel\\analyst_note.pdf'}


In [102]:
def _seq_windows_by_pool(df: pd.DataFrame, cols: list[str], win: int = 30):
    df = df.sort_values(["pool", "ts"]).copy()
    idx_map, windows = [], []
    for _, g in df.groupby("pool", sort=False):
        A = g[cols].astype(float).replace([np.inf, -np.inf], np.nan).fillna(0.0).to_numpy()
        gi = g.index.to_list()
        for i in range(len(A)):
            idx_map.append(gi[i])
            windows.append(A[i+1-win:i+1] if i+1 >= win else None)
    return idx_map, windows, df.index

In [103]:
def _lstm_autoencoder_scores(df: pd.DataFrame, cols: list[str] = ["dev","dev_roll_std","tvl_outflow_rate"], win: int = 30) -> np.ndarray:
    if not _TORCH_OK:
        return np.zeros(len(df), dtype=float)
    counts = df.groupby("pool").size()
    min_hist = int(counts.min()) if not counts.empty else 0
    win_eff = max(8, min(win, min_hist))
    if win_eff < 8:
        return np.zeros(len(df), dtype=float)
    idx_map, windows, full_index = _seq_windows_by_pool(df, cols, win=win_eff)
    valid = [i for i,w in enumerate(windows) if w is not None]
    if not valid:
        return np.zeros(len(df), dtype=float)
    X = np.stack([windows[i] for i in valid]).astype(np.float32)  
    T, F = X.shape[1], X.shape[2]
    import torch, torch.nn as nn
    class LSTMAE(nn.Module):
        def __init__(self, fdim, h=16):
            super().__init__()
            self.enc = nn.LSTM(input_size=fdim, hidden_size=max(8, h), batch_first=True)
            self.dec = nn.LSTM(input_size=max(8, h), hidden_size=fdim, batch_first=True)
        def forward(self, x):
            z,_ = self.enc(x)
            y,_ = self.dec(z[:, -1:, :].repeat(1, x.size(1), 1))
            return y
    device = "cpu"
    model = LSTMAE(F, h=max(8, F*2)).to(device)
    opt = torch.optim.Adam(model.parameters(), lr=1e-3)
    crit = nn.MSELoss()
    xb = torch.from_numpy(X).to(device)
    model.train()
    for _ in range(6):
        opt.zero_grad(); recon = model(xb); loss = crit(recon, xb); loss.backward(); opt.step()
    model.eval()
    with torch.no_grad():
        recon = model(xb).cpu().numpy()
    err = ((recon - X)**2).mean(axis=(1,2))
    err = _scale_01(err)
    pos_map = {ix: j for j, ix in enumerate(full_index)}
    scores = np.zeros(len(full_index), dtype=float)
    for i_win, e in zip(valid, err):
        scores[pos_map[idx_map[i_win]]] = float(e)
    return scores

In [104]:
def compute_network_features_v2(win: int = 60) -> pd.DataFrame:
    df = load_live().copy()
    if df.empty:
        print("[network] live is empty; skipping")
        return pd.DataFrame(columns=["ts","pool","neighbor_max_dev","neighbor_avg_anom","lead_lag_best","corr_best"])
    df = df.sort_values("ts")
    for _, r in df.iterrows():
        _push_hist(_hist_dev,  r["pool"], r.get("dev", 0.0), win)
        _push_hist(_hist_anom, r["pool"], r.get("anom_fused", 0.0), win)
    pools = list(df["pool"].dropna().unique())
    rows = []
    for p in pools:
        dev_p = list(_hist_dev.get(p, []))
        neigh = [q for q in pools if q != p]
        neigh_max_dev  = 0.0
        neigh_avg_anom = 0.0
        lead_lag_best  = 0
        corr_best      = 0.0
        used = 0
        for q in neigh:
            dev_q = list(_hist_dev.get(q, []))
            an_q  = list(_hist_anom.get(q, []))
            if dev_q:
                neigh_max_dev = max(neigh_max_dev, float(np.nanmax(np.abs(dev_q))))
            if an_q:
                neigh_avg_anom += float(np.nanmean(an_q)); used += 1
            if dev_p and dev_q:
                c, lag = _cross_corr_lag(np.array(dev_p), np.array(dev_q), max_lag=10)
                if np.isfinite(c) and abs(c) > abs(corr_best):
                    corr_best, lead_lag_best = float(c), int(lag)
        if used > 0:
            neigh_avg_anom /= used
        rows.append({
            "pool": p,
            "neighbor_max_dev": float(neigh_max_dev),
            "neighbor_avg_anom": float(neigh_avg_anom),
            "lead_lag_best": int(lead_lag_best),
            "corr_best": float(corr_best),
        })
    latest = df.groupby("pool", as_index=False).tail(1).copy()
    net = pd.DataFrame(rows)
    latest = latest.drop(columns=["neighbor_max_dev","neighbor_avg_anom","lead_lag_best","corr_best"], errors="ignore")
    latest = latest.merge(net, on="pool", how="left")
    latest["neighbor_max_dev"]  = latest["neighbor_max_dev"].fillna(0.0)
    latest["neighbor_avg_anom"] = latest["neighbor_avg_anom"].fillna(0.0)
    latest["lead_lag_best"]     = latest["lead_lag_best"].fillna(0).astype(int)
    latest["corr_best"]         = latest["corr_best"].fillna(0.0)
    live = load_live()
    live["ts_str"] = _iso_str(live["ts"]); latest["ts_str"] = _iso_str(latest["ts"])
    live_idx = live.set_index(["pool","ts_str"])
    upd_idx  = latest.set_index(["pool","ts_str"])[["neighbor_max_dev","neighbor_avg_anom","lead_lag_best","corr_best"]]
    live_idx.update(upd_idx)
    live_updated = live_idx.reset_index().drop(columns=["ts_str"])
    write_live(live_updated)
    print("[network] v2 updated neighbor features (no NaNs)")
    return latest[["ts","pool","neighbor_max_dev","neighbor_avg_anom","lead_lag_best","corr_best"]].sort_values("pool")
compute_network_features = compute_network_features_v2

In [105]:
def fixup_coldstart_nans() -> pd.DataFrame:
    df = load_live()
    if df.empty:
        print("[fixup] no data")
        return df
    df = df.sort_values(["pool","ts"]).reset_index(drop=True)
    if "dev" in df.columns:
        roll = (df.groupby("pool")["dev"]
                  .rolling(20, min_periods=3).std()
                  .reset_index(level=0, drop=True))
        df["dev_roll_std"] = roll.fillna(0.0)
    for c, default in [
        ("neighbor_max_dev", 0.0),
        ("neighbor_avg_anom", 0.0),
        ("lead_lag_best", 0),
        ("corr_best", 0.0),
        ("z_ae_seq", 0.0),
    ]:
        if c in df.columns:
            df[c] = df[c].fillna(default)
    df = ensure_targets_all(df)
    write_live(df)
    print("[fixup] filled NaNs and ensured labels")
    return df.tail(12)

In [106]:
def run_anomaly_zoo_update_live_v2(features_cols: List[str] | None = None) -> pd.DataFrame:
    df = load_live()
    if df.empty or df["dev"].isna().all():
        print("[zoo] live dataset empty; sample first.")
        return df
    if not features_cols:
        features_cols = ["dev","dev_roll_std","tvl_outflow_rate","spot_twap_gap_bps","oracle_ratio"]
    use_cols = [c for c in features_cols if c in df.columns]
    if not use_cols:
        raise ValueError("No usable feature columns found in live dataset.")
    df_proc = df.copy()
    X = df_proc[use_cols].astype(float).replace([np.inf, -np.inf], np.nan).fillna(0.0).to_numpy()
    scaler = StandardScaler() if _SK_OK else None
    Xs = scaler.fit_transform(X) if scaler is not None else X
    z_if = np.zeros(len(df_proc))
    if _SK_OK:
        if_clf = IsolationForest(n_estimators=200, contamination="auto", random_state=SEED)
        z_if = _scale_01(-if_clf.fit(Xs).score_samples(Xs))
    z_lof = np.zeros(len(df_proc))
    if _SK_OK:
        try:
            lof2 = LocalOutlierFactor(n_neighbors=20, novelty=True).fit(Xs)
            z_lof = _scale_01(-lof2.score_samples(Xs))
        except Exception:
            lof = LocalOutlierFactor(n_neighbors=20, novelty=False)
            z_lof = _scale_01((-lof.fit_predict(Xs)).astype(float))
    z_ocsvm = np.zeros(len(df_proc))
    if _SK_OK:
        try:
            oc = OneClassSVM(kernel="rbf", gamma="scale", nu=0.05)
            z_ocsvm = _scale_01(-oc.fit(Xs).decision_function(Xs))
        except Exception:
            z_ocsvm = np.zeros(len(df_proc))
    z_cusum = []
    for _, g in df_proc.sort_values(["pool","ts"]).groupby("pool", sort=False):
        z_cusum.append(_cusum_score(g["dev"]))
    z_cusum = pd.concat(z_cusum).reindex(df_proc.index).fillna(0.0).to_numpy()
    z_cusum = _scale_01(z_cusum)
    try:
        z_ae = _autoencoder_scores(df_proc, Xs)
    except Exception:
        z_ae = np.zeros(len(df_proc))
    try:
        z_ae_seq = _lstm_autoencoder_scores(df_proc, cols=["dev","dev_roll_std","tvl_outflow_rate"], win=30)
    except Exception:
        z_ae_seq = np.zeros(len(df_proc))
    fused = np.nanmax(np.vstack([z_if, z_lof, z_ocsvm, z_cusum, z_ae, z_ae_seq]), axis=0)
    df_proc["z_if"] = z_if
    df_proc["z_lof"] = z_lof
    df_proc["z_ocsvm"] = z_ocsvm
    df_proc["z_cusum"] = z_cusum
    df_proc["z_ae"] = z_ae
    df_proc["z_ae_seq"] = z_ae_seq
    df_proc["anom_fused"] = fused
    write_live(df_proc)
    print("[zoo] anomaly scores (incl. LSTM AE) updated")
    return df_proc.tail(10)

In [107]:
run_anomaly_zoo_update_live = run_anomaly_zoo_update_live_v2

In [108]:
sample_once()
run_anomaly_zoo_update_live() 

[append] 3 rows → C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\live_dataset.csv
[zoo] anomaly scores (incl. LSTM AE) updated


Unnamed: 0,ts,pool,dex_spot,dex_twap,oracle_ratio,dev,dev_roll_std,tvl_outflow_rate,virtual_price,spot_twap_gap_bps,...,z_cusum,z_ae,anom_fused,y_10m,y_30m,neighbor_max_dev,neighbor_avg_anom,lead_lag_best,corr_best,z_ae_seq
2227,2025-08-28 03:59:22+00:00,USDC/USDT_univ3,1.000115,0.999982,,,0.0,-0.001727,,1.33064,...,1.0,0.002764,1.0,1.0,,,,,,0.747763
2228,2025-08-28 04:00:22+00:00,USDC/USDT_univ3,1.000268,1.000039,,,0.0,0.002689,,2.29069,...,1.0,0.002508,1.0,1.0,,0.004702,0.71868,0.0,0.0,0.749184
2229,2025-08-28 04:02:08+00:00,USDC/USDT_univ3,1.000136,1.000059,,,0.0,-0.004277,,0.773251,...,1.0,0.003892,1.0,0.0,,,,,,0.744028
2230,2025-08-28 04:03:30+00:00,USDC/USDT_univ3,0.999768,1.000091,,,,0.002406,,-3.224675,...,1.0,0.002928,1.0,0.0,,,,,,0.745633
2231,2025-08-28 20:49:13+00:00,USDC/USDT_univ3,1.000149,1.000149,1.000163,0.000163,,-0.001625,,0.0,...,1.0,0.00152,1.0,,,0.004702,1.0,8.0,0.467008,0.747316
2232,2025-08-28 20:49:13+00:00,DAI/USDC_univ3,0.999641,0.999641,0.999427,-0.000573,,0.007126,,-1.110622e-12,...,1.0,0.002418,1.0,,,0.004702,1.0,2.0,0.964808,0.745918
2233,2025-08-28 20:49:13+00:00,3pool_curve,1.000056,1.000056,0.999955,-4.5e-05,,-0.003111,1.000056,0.0,...,1.0,0.002075,1.0,,,0.004702,1.0,-8.0,0.467008,0.748683
2234,2025-08-28 20:50:37+00:00,USDC/USDT_univ3,0.999826,1.000084,0.999878,-0.000122,,9.6e-05,,-2.586003,...,1.0,0.002052,1.0,,,,,,,0.746995
2235,2025-08-28 20:50:37+00:00,DAI/USDC_univ3,0.999216,0.999556,0.999121,-0.000879,,-0.002207,,-3.396891,...,1.0,0.003243,1.0,,,,,,,0.748325
2236,2025-08-28 20:50:37+00:00,3pool_curve,0.999797,1.000004,0.999766,-0.000234,,0.002809,0.999797,-2.066987,...,1.0,0.001651,1.0,,,,,,,0.748106


In [109]:
!pip install transformers accelerate sentence-transformers 
!pip install bitsandbytes



In [110]:
!pip install faiss-cpu



In [111]:
import os, json, re
import numpy as np
import pandas as pd
from datetime import datetime, timezone
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

In [112]:
DEVICE = "cuda" if torch.cuda.is_available() else (
    "mps" if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available() else "cpu"
)

In [113]:
def _best_dtype(device: str):
    if device == "cuda":
        try:
            major, _ = torch.cuda.get_device_capability(0)
            return torch.bfloat16 if major >= 8 else torch.float16
        except Exception:
            return torch.float16
    return torch.float32
DTYPE = _best_dtype(DEVICE)
print(f"[llm] device={DEVICE} dtype={DTYPE}")

[llm] device=cuda dtype=torch.float16


In [114]:
class HFSeq2Seq:
    """
    * Enforces encoder truncation (<= max_input_tokens).
    * Retries on CUDA OOM (halve lengths) then CPU fallback for the call.
    * Tries 4-bit (bnb) -> 8-bit -> fp16/bf16 depending on availability.
    * If 4-bit device_map='auto' would offload to CPU/disk (disallowed), retries without auto-map.
    """
    def __init__(
        self,
        model_name: str = "google/flan-t5-small",
        device: str = DEVICE,
        dtype = DTYPE,
        max_new_tokens: int = 128,
        max_input_tokens: int = 480,
        load_quant: str | None = "4bit"  
    ):
        self.model_name = model_name
        self.device = device
        self.dtype = dtype
        self.max_new_tokens = int(max_new_tokens)
        self.max_input_tokens = int(max_input_tokens)
        hf_token = os.getenv("HUGGINGFACE_HUB_TOKEN", None)
        tok_kwargs = {}
        if hf_token:
            tok_kwargs["use_auth_token"] = hf_token
            tok_kwargs["token"] = hf_token
        self.tok = AutoTokenizer.from_pretrained(model_name, **tok_kwargs)
        try:
            self.tok.model_max_length = min(self.tok.model_max_length or 512, self.max_input_tokens)
        except Exception:
            pass
        self.model = self._load_with_fallbacks(model_name, device, dtype, load_quant)
    def _load_with_fallbacks(self, model_name, device, dtype, load_quant):
        if device == "cuda" and load_quant == "4bit":
            try:
                from transformers import BitsAndBytesConfig
                print(f"[llm] trying {model_name} in 4bit (auto-map)")
                q = BitsAndBytesConfig(
                    load_in_4bit=True,
                    bnb_4bit_compute_dtype=dtype,
                    bnb_4bit_use_double_quant=True,
                    bnb_4bit_quant_type="nf4",
                )
                return AutoModelForSeq2SeqLM.from_pretrained(
                    model_name,
                    low_cpu_mem_usage=True,
                    torch_dtype=dtype,
                    quantization_config=q,
                    device_map="auto",
                )
            except ValueError as e:
                if "dispatched on the CPU or the disk" in str(e):
                    try:
                        from transformers import BitsAndBytesConfig
                        print(f"[llm] retry {model_name} 4bit (no auto-map)")
                        q = BitsAndBytesConfig(
                            load_in_4bit=True,
                            bnb_4bit_compute_dtype=dtype,
                            bnb_4bit_use_double_quant=True,
                            bnb_4bit_quant_type="nf4",
                        )
                        m = AutoModelForSeq2SeqLM.from_pretrained(
                            model_name,
                            low_cpu_mem_usage=True,
                            torch_dtype=dtype,
                            quantization_config=q,
                        )
                        return m.to("cuda")
                    except torch.cuda.OutOfMemoryError:
                        print("[llm] OOM loading 4bit fully on GPU; falling back to 8bit…")
                    except Exception as e2:
                        print(f"[llm] 4bit (no auto-map) failed: {e2}")
                else:
                    print(f"[llm] 4bit (auto) failed: {e}")
            except Exception as e:
                print(f"[llm] 4bit load skipped: {e}")
        if device == "cuda" and load_quant in ("4bit","8bit"):
            try:
                from transformers import BitsAndBytesConfig
                print(f"[llm] trying {model_name} in 8bit (auto-map + CPU offload)")
                q = BitsAndBytesConfig(load_in_8bit=True)
                return AutoModelForSeq2SeqLM.from_pretrained(
                    model_name,
                    low_cpu_mem_usage=True,
                    torch_dtype=dtype,
                    quantization_config=q,
                    device_map="auto",
                    llm_int8_enable_fp32_cpu_offload=True,
                )
            except Exception as e:
                print(f"[llm] 8bit load skipped: {e}")
        print(f"[llm] loading {model_name} in {('fp16/bf16' if device in ('cuda','mps') else 'fp32')} (no quant)")
        m = AutoModelForSeq2SeqLM.from_pretrained(
            model_name,
            low_cpu_mem_usage=True,
            torch_dtype=(dtype if device in ("cuda","mps") else torch.float32),
        )
        if device == "cuda":
            try:
                m = m.to("cuda")
            except torch.cuda.OutOfMemoryError:
                print("[llm] OOM moving model to GPU; keeping on CPU.")
        elif device == "mps":
            try:
                m = m.to("mps")
            except Exception:
                pass
        return m
    @torch.inference_mode()
    def generate(self, prompt: str, max_new_tokens: int | None = None, max_input_tokens: int | None = None) -> str:
        mt  = int(max_new_tokens or self.max_new_tokens)
        mit = int(max_input_tokens or self.max_input_tokens)
        def _encode(_mit):
            return self.tok(prompt, return_tensors="pt", truncation=True, max_length=_mit, padding="longest")
        def _gen(enc, _mt):
            enc = {k: v.to(self.model.device) for k, v in enc.items()}
            return self.model.generate(**enc, max_new_tokens=_mt, do_sample=False, use_cache=True, num_beams=1)
        try:
            enc = _encode(mit)
            out = _gen(enc, mt)
        except torch.cuda.OutOfMemoryError:
            try:
                torch.cuda.empty_cache()
            except Exception:
                pass
            mt2, mit2 = max(64, mt//2), max(256, mit//2)
            try:
                enc = _encode(mit2)
                out = _gen(enc, mt2)
            except torch.cuda.OutOfMemoryError:
                print("[llm] OOM on GPU during gen — CPU fallback for this call.")
                self.model = self.model.to("cpu")
                enc = {k: v.to("cpu") for k, v in enc.items()}
                out = self.model.generate(**enc, max_new_tokens=mt2, do_sample=False, use_cache=True, num_beams=1)
        except RuntimeError as e:
            if "sequence length" in str(e).lower():
                enc = _encode(480)
                out = _gen(enc, mt)
            else:
                return f"[LLM-ERROR] {e}"
        return self.tok.decode(out[0], skip_special_tokens=True).strip()

In [115]:
T5_IE   = HFSeq2Seq(model_name="google/flan-t5-small", device=DEVICE, dtype=DTYPE, max_new_tokens=120, max_input_tokens=480, load_quant="4bit")
T5_NOTE = HFSeq2Seq(model_name="google/flan-t5-small", device=DEVICE, dtype=DTYPE, max_new_tokens=128, max_input_tokens=480, load_quant="4bit")
print("[llm] IE on", T5_IE.model.device, "| NOTE on", T5_NOTE.model.device)

[llm] trying google/flan-t5-small in 4bit (auto-map)
[llm] trying google/flan-t5-small in 4bit (auto-map)
[llm] IE on cuda:0 | NOTE on cuda:0


In [116]:
def _truncate(s: str, limit: int = 3000) -> str:
    try:
        return s if len(s) <= limit else s[:limit] + "\n... [truncated]"
    except Exception:
        return str(s)[:limit]

In [117]:
def _json_between(s: str):
    if not isinstance(s, str):
        return []
    m = list(re.finditer(r'\[.*\]|\{.*\}', s, flags=re.S))
    if not m:
        return []
    block = m[-1].group(0)
    try:
        return json.loads(block)
    except Exception:
        block2 = re.sub(r"(\w+)\s*:", r'"\1":', block).replace("'", '"')
        try:
            return json.loads(block2)
        except Exception:
            return []

In [118]:
def _now_iso() -> str:
    return datetime.now(timezone.utc).isoformat(timespec="seconds")
def _rag_hits(rag_obj, query: str, k: int = 5):
    try:
        return rag_obj.search(query, k=k)
    except Exception:
        return []

In [119]:
def ie_extract_events_t5(docs: list[dict], use_rag: bool = False, k_ctx: int = 5) -> list[dict]:
    if "T5_IE" not in globals():
        raise RuntimeError("T5_IE not initialized.")
    docs_min = []
    for d in (docs or [])[:12]:
        docs_min.append({
            "title": d.get("title") or d.get("status") or "doc",
            "text": (d.get("body") or d.get("msg") or d.get("summary") or "")[:400],
            "ts": d.get("ts") or _now_iso(),
            "source": d.get("source") or "",
        })
    ctx = []
    if use_rag and "RAG_B" in globals():
        hits = _rag_hits(RAG_B, query="stablecoin depeg OR curve status OR usdc usdt dai incident", k=k_ctx)
        ctx = [{"title": h.get("title",""), "text": str(h.get("text",""))[:300], "ts": h.get("ts",""), "source": h.get("source","")} for h in hits]
    instr = (
        "Extract risk events from DOCS (and CONTEXT if present). "
        "Return a JSON array. Each item has: type, severity, ts, summary, source. "
        "severity is 1..5 (5=highest). summary <= 140 chars. Only include depeg-relevant items."
    )
    payload = {"context": ctx, "docs": docs_min}
    prompt = f"{instr}\nDATA:\n{_truncate(json.dumps(payload, ensure_ascii=False), 3000)}\nJSON:"
    raw = T5_IE.generate(prompt, max_new_tokens=110, max_input_tokens=480)
    ev = _json_between(raw)
    out = []
    if isinstance(ev, list) and ev:
        for e in ev:
            sv = pd.to_numeric(e.get("severity", 1), errors="coerce")
            sv = int(1 if pd.isna(sv) else max(1, min(5, int(sv))))
            out.append({
                "type": str(e.get("type","information")),
                "severity": sv,
                "ts": str(e.get("ts") or _now_iso()),
                "summary": str(e.get("summary",""))[:140],
                "source": str(e.get("source","")),
            })
    else:
        s0 = docs_min[0]["source"] if docs_min else ""
        t0 = docs_min[0]["ts"] if docs_min else _now_iso()
        out = [{"type":"information","severity":1,"ts":t0,"summary":"(fallback) no actionable events parsed","source":s0}]
    return out

In [120]:
def reason_and_write_t5(feats_df: pd.DataFrame, events: list[dict], use_rag: bool = True, k_ctx: int = 3) -> dict:
    if "T5_NOTE" not in globals():
        raise RuntimeError("T5_NOTE not initialized.")
    cols = ["pool","dev","anom_fused","r0_delta","r1_delta","dev_roll_std",
            "tvl_outflow_rate","spot_twap_gap_bps","oracle_ratio","risk_forecast_10m"]
    cols = [c for c in cols if isinstance(feats_df, pd.DataFrame) and c in feats_df.columns]
    onchain = (feats_df[cols].tail(12).replace([np.inf,-np.inf], np.nan).fillna(0.0).round(6)
               .to_dict(orient="records")) if cols else []
    ctxA, ctxB = [], []
    if use_rag and "RAG_A" in globals():
        a = _rag_hits(RAG_A, query="dex-oracle spread twap liquidity imbalance stablecoin", k=k_ctx)
        ctxA = [{"title": h.get("title",""), "text": str(h.get("text",""))[:280]} for h in a]
    if use_rag and "RAG_B" in globals():
        b = _rag_hits(RAG_B, query="curve status incident outage depeg", k=k_ctx)
        ctxB = [{"title": h.get("title",""), "text": str(h.get("text",""))[:280], "source": h.get("source","")} for h in b]
    citations = []
    for e in (events or [])[:5]:
        src = e.get("source"); 
        if src: citations.append(str(src))
    for h in ctxB:
        src = h.get("source")
        if src and src not in citations:
            citations.append(src)
    instr = (
        "You are a DeFi risk analyst. Using ONCHAIN, EVENTS, and CONTEXT, output JSON with:\n"
        "- risk_score (0..100 integer)\n"
        "- analyst_note (<=200 words)\n"
        "- actions (<=3 items, each {title, rationale})\n"
        "Emphasize DEX-oracle dev, anom_fused, TVL outflow, spot–TWAP gap, and event severity. "
        "If risk_forecast_10m exists, factor it in. No markdown in JSON."
    )
    data = {
        "onchain": onchain,
        "events": [{"summary": e.get("summary","")[:160], "severity": int(e.get("severity",1)), "source": e.get("source","")} for e in (events or [])[:8]],
        "context": {"kb": ctxA, "news": ctxB}
    }
    prompt = f"{instr}\nDATA:\n{_truncate(json.dumps(data, ensure_ascii=False), 3000)}\nJSON:"
    raw = T5_NOTE.generate(prompt, max_new_tokens=120, max_input_tokens=480)
    obj = _json_between(raw)
    def _fallback():
        risk_now = float(np.nanmax([r.get("anom_fused", 0.0) for r in onchain]) if onchain else 0.0)
        p10 = float(np.nanmax([r.get("risk_forecast_10m", 0.0) for r in onchain]) if onchain else 0.0)
        rs = int(max(0, min(100, round(100 * max(risk_now, p10)))))
        note = "Monitor spreads and TVL outflows; no strong signals parsed. Re-run in 5 minutes."
        acts = [
            {"title":"Monitor in 5m","rationale":"Confirm spreads, TWAP gap, and event freshness"},
            {"title":"Reroute if |dev|>0.6%","rationale":"Avoid slippage during transient depegs"},
            {"title":"Escalate on 3 reds","rationale":"Trigger mitigation webhook with human ack"},
        ]
        return {"risk_score": rs, "analyst_note": note, "actions": acts}
    if not isinstance(obj, dict) or "risk_score" not in obj:
        obj = _fallback()
    try:
        rs = int(max(0, min(100, int(pd.to_numeric(obj.get("risk_score", 0), errors="coerce") or 0))))
    except Exception:
        rs = _fallback()["risk_score"]
    note = str(obj.get("analyst_note", ""))[:1200]
    acts = obj.get("actions", [])
    if not isinstance(acts, list):
        acts = []
    acts2 = [{"title": str(a.get("title","Action"))[:80], "rationale": str(a.get("rationale",""))[:200]} for a in acts[:3]]
    return {"risk_score": rs, "analyst_note": note, "actions": acts2, "citations": citations[:6]}

In [121]:
from fastapi import FastAPI, Query
from typing import List, Optional
import pandas as pd

In [122]:
app = FastAPI(title="Depeg Sentinel MCP", version="2.0.0")

In [123]:
@app.get("/ml/score_zoo")
def score_zoo(pools: Optional[List[str]] = Query(default=None)):
    df = run_anomaly_zoo_update_live()
    if pools:
        df = df[df["pool"].isin(pools)]
    return df[["ts","pool","z_if","z_lof","z_ocsvm","z_cusum","z_ae","z_ae_seq","anom_fused"]].tail(200).to_dict("records")

In [124]:
@app.get("/ml/forecast")
def forecast(pools: Optional[List[str]] = Query(default=None), horizon: List[int] = Query(default=[10])):
    out = {}
    if 10 in horizon:
        out["h10"] = score_latest_10m(n_tail=200, write_parquet=False).to_dict("records")
    if pools:
        for k in out:
            out[k] = [r for r in out[k] if r["pool"] in pools]
    return out
@app.get("/ml/explain")
def explain(pools: Optional[List[str]] = Query(default=None)):
    exp = explain_forecast_10m()
    return exp
@app.get("/intel/top_events")
def top_events(since: Optional[str] = None):
    ev = _load_events()
    if since:
        ev = [e for e in ev if str(e.get("ts","")) >= since]
    ev.sort(key=lambda e: int(e.get("severity", 0)), reverse=True)
    return ev[:50]
@app.get("/signals/network")
def signals_network(pools: Optional[List[str]] = Query(default=None)):
    df = compute_network_features()
    if pools:
        df = df[df["pool"].isin(pools)]
    return df.to_dict("records")
@app.post("/policy/decide")
def policy_decide_api(state: dict | None = None):
    df = decide_latest(n_tail=20)
    return df.tail(1).to_dict("records")[0]
@app.get("/policy/retrain_check")
def retrain_check():
    return {"should_retrain": True, "reason": "scheduled nightly or drift threshold (PR-AUC drop) hit"}
@app.get("/policy/snapshot")
def snapshot():
    note = build_analyst_note_v2()
    rep = nightly_report()
    return {"note": note, "report": rep}

In [125]:
PROFILE = "balanced_fast"  
PROFILES = {
    "blitz":          {"LOOP_SECS": 10, "TRAIN_EVERY": 180},   
    "balanced_fast":  {"LOOP_SECS": 30, "TRAIN_EVERY": 120},   
    "cheap":          {"LOOP_SECS": 60, "TRAIN_EVERY": 120},   
}
FAST_CFG = {
    "TAIL_WIN":     500,  
    "EVENTS_EVERY": 6,    
    "NETWORK_EVERY":2,    
    "META_EVERY":   2,   
}
LOOP_SECS = PROFILES[PROFILE]["LOOP_SECS"]
TRAIN_EVERY = PROFILES[PROFILE]["TRAIN_EVERY"]

In [126]:
import time, math
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime, timezone, timedelta
import numpy as np
import pandas as pd

In [127]:
def _unique_oracle_addrs() -> list[str]:
    return list({a for a in CFG.chainlink_feeds.values() if a})
def _fetch_oracles_parallel() -> dict[str, OnchainResult]:
    addrs = _unique_oracle_addrs()
    out: dict[str, OnchainResult] = {}
    if not addrs: 
        return out
    with ThreadPoolExecutor(max_workers=min(8, len(addrs))) as ex:
        futs = {ex.submit(onchain.get_oracle_price, a): a for a in addrs}
        for fut in as_completed(futs):
            a = futs[fut]
            try:
                out[a] = fut.result()
            except Exception as e:
                out[a] = OnchainResult(False, _now_iso(), None, {}, str(e))
    return out

In [128]:
def sample_once_parallel(ts_override: str | None = None) -> pd.DataFrame:
    """Faster sample: parallel oracles + pools. Optional timestamp override for bootstrapping."""
    ts_now = ts_override or _now_iso()
    oracle_res = _fetch_oracles_parallel()
    def _oracle_px_for(sym: str) -> float:
        feed = CFG.chainlink_feeds.get(sym, "")
        r = oracle_res.get(feed)
        return float(r.data.get("price")) if (r and r.ok and r.data.get("price") is not None) else float("nan")
    def _fetch_pool(pool_name: str, meta: dict) -> dict:
        typ  = meta.get("type"); addr = meta.get("address"); sym = meta.get("symbol", "USDC/USD")
        dex_spot = np.nan; virt_price = np.nan; r0 = r1 = np.nan
        if typ == "uniswap_v3":
            uni = onchain.get_univ3_price(addr)
            res = onchain.get_reserves_min(addr)
            if uni and uni.ok: dex_spot = float(uni.data.get("price", np.nan))
            if res and res.ok: r0 = float(res.data.get("r0", np.nan)); r1 = float(res.data.get("r1", np.nan))
        elif typ == "curve":
            cur = onchain.get_curve_virtual_price(addr)
            res = onchain.get_reserves_min(addr)
            if cur and cur.ok: virt_price = float(cur.data.get("virtual_price", np.nan))
            if res and res.ok: r0 = float(res.data.get("r0", np.nan)); r1 = float(res.data.get("r1", np.nan))
        return {
            "ts": ts_now, "pool": pool_name,
            "dex_spot": dex_spot, "virtual_price": virt_price,
            "oracle_px": _oracle_px_for(sym),
            "r0": r0, "r1": r1, "block": None,
        }
    rows: list[dict] = []
    with ThreadPoolExecutor(max_workers=min(8, len(CFG.pools))) as ex:
        futs = {ex.submit(_fetch_pool, name, meta): name for name, meta in CFG.pools.items()}
        for fut in as_completed(futs):
            try: rows.append(fut.result())
            except Exception: pass
    feat = engineer_features(rows)
    append_live(feat)
    return feat

In [129]:
def run_anomaly_zoo_update_live_fast(tail_win: int = 500, features_cols: list[str] | None = None) -> pd.DataFrame:
    df = load_live()
    if df.empty or df["dev"].isna().all(): return df
    if not features_cols:
        features_cols = ["dev","dev_roll_std","tvl_outflow_rate","spot_twap_gap_bps","oracle_ratio"]
    use_cols = [c for c in features_cols if c in df.columns]
    if not use_cols: return df.tail(1)
    tail = df.tail(tail_win).copy()
    X = tail[use_cols].astype(float).replace([np.inf,-np.inf], np.nan).fillna(0.0).to_numpy()
    if _SK_OK:
        from sklearn.preprocessing import StandardScaler
        Xs = StandardScaler().fit_transform(X)
    else:
        Xs = X
    z_if = np.zeros(len(tail)); z_lof = np.zeros(len(tail)); z_ocsvm = np.zeros(len(tail))
    if _SK_OK:
        from sklearn.ensemble import IsolationForest
        from sklearn.neighbors import LocalOutlierFactor
        from sklearn.svm import OneClassSVM
        try:
            z_if = _scale_01(-IsolationForest(n_estimators=150, contamination="auto", random_state=SEED, n_jobs=-1).fit(Xs).score_samples(Xs))
        except Exception: pass
        try:
            lof2 = LocalOutlierFactor(n_neighbors=20, novelty=True).fit(Xs)
            z_lof = _scale_01(-lof2.score_samples(Xs))
        except Exception:
            try:
                z_lof = _scale_01((-LocalOutlierFactor(n_neighbors=20).fit_predict(Xs)).astype(float))
            except Exception: pass
        try:
            z_ocsvm = _scale_01(-OneClassSVM(kernel="rbf", gamma="scale", nu=0.05).fit(Xs).decision_function(Xs))
        except Exception: pass
    z_cus_all = []
    for _, g in df.sort_values(["pool","ts"]).groupby("pool", sort=False):
        z_cus_all.append(_cusum_score(g["dev"]))
    z_cus = pd.concat(z_cus_all).reindex(tail.index).fillna(0.0).to_numpy()
    z_cus = _scale_01(z_cus)
    try:
        z_ae = _autoencoder_scores(tail, Xs)
    except Exception:
        z_ae = np.zeros(len(tail))
    fused = np.nanmax(np.vstack([z_if, z_lof, z_ocsvm, z_cus, z_ae]), axis=0)
    for k, arr in [("z_if",z_if),("z_lof",z_lof),("z_ocsvm",z_ocsvm),("z_cusum",z_cus),("z_ae",z_ae),("anom_fused",fused)]:
        tail[k] = arr
    live = load_live()
    cols = ["z_if","z_lof","z_ocsvm","z_cusum","z_ae","anom_fused"]
    live.loc[tail.index, cols] = tail[cols]
    write_live(live)
    return tail.tail(10)

In [130]:
def _gen_series_step(prev, vol=1e-4, mean=0.0, jump_prob=0.02, jump_bps=30.0, floor=None, ceil=None):
    if prev is None or not np.isfinite(prev): prev = 1.0
    step = np.random.normal(mean, vol)
    shock = np.random.normal(0.0, jump_bps)/1e4 if (np.random.rand() < float(jump_prob)) else 0.0
    nxt = prev * (1.0 + step + shock)
    if floor is not None: nxt = max(floor, nxt)
    if ceil  is not None: nxt = min(ceil,  nxt)
    return float(nxt)

In [131]:
def hyperbootstrap(target_rows: int = 800, step_seconds: int = 15, batch_size: int = 500):
    """
    Fill live_dataset.csv to 'target_rows' quickly with realistic synthetic ticks.
    Uses current state for seed; writes in big batches for speed; runs zoo+train once at end.
    """
    try: latest_real = sample_once_parallel()
    except Exception: latest_real = sample_once()
    live0 = load_live(); have = int(len(live0))
    if have >= target_rows:
        print(f"[hyperbootstrap] already have {have} rows; nothing to do.")
        return
    latest = load_live().sort_values("ts").groupby("pool").tail(1)
    if latest.empty: latest = latest_real
    pools = list(latest["pool"].unique()); P = max(1, len(pools))
    steps_needed = int(math.ceil((target_rows - have)/P))
    print(f"[hyperbootstrap] need ~{steps_needed} synthetic steps ({P} pools) → target_rows={target_rows}")
    state = {}
    for _, r in latest.iterrows():
        p = r["pool"]
        state[p] = {
            "dex_spot":      float(r.get("dex_spot", 1.0)),
            "oracle_px":     float(r.get("oracle_px", 1.0)),
            "virtual_price": float(r.get("virtual_price", 1.0)),
            "r0":            float(r.get("r0", 1e9)),
            "r1":            float(r.get("r1", 1e9)),
        }
    now = datetime.now(timezone.utc)
    start = now - timedelta(seconds=steps_needed * step_seconds)
    buf = []
    for i in range(steps_needed):
        ts_i = (start + timedelta(seconds=i * step_seconds)).isoformat(timespec="seconds")
        for p in pools:
            st = state[p]
            new_oracle = _gen_series_step(st["oracle_px"],     vol=2e-5,  jump_prob=0.01, jump_bps=5.0,  floor=0.95, ceil=1.05)
            new_spot   = _gen_series_step(st["dex_spot"],      vol=6e-5,  jump_prob=0.03, jump_bps=40., floor=0.90, ceil=1.10)
            new_vp     = _gen_series_step(st["virtual_price"], vol=1.5e-5,jump_prob=0.005,jump_bps=3.0, floor=0.98, ceil=1.02)
            r0 = float(st["r0"] * (1.0 + np.random.normal(0, 2e-3)))
            r1 = float(st["r1"] * (1.0 + np.random.normal(0, 2e-3)))
            buf.append({"ts": ts_i, "pool": p, "dex_spot": new_spot, "oracle_px": new_oracle,
                        "virtual_price": new_vp, "r0": r0, "r1": r1, "block": None})
            st["dex_spot"], st["oracle_px"], st["virtual_price"], st["r0"], st["r1"] = new_spot, new_oracle, new_vp, r0, r1
        if len(buf) >= batch_size or (i+1) == steps_needed:
            feat = engineer_features(buf); append_live(feat); buf = []
            if (i+1) % (batch_size*1) == 0: print(f"[hyperbootstrap] step {i+1}/{steps_needed}")
    run_anomaly_zoo_update_live_fast(tail_win=FAST_CFG["TAIL_WIN"])
    df_all = load_live()
    if "y_10m" not in df_all.columns or df_all["y_10m"].nunique() < 2:
        df_all["y_10m"] = _ensure_labels(df_all); write_live(df_all)
    try: train_forecaster_10m()
    except Exception as e: print(f"[hyperbootstrap] forecaster train skipped: {e}")
    score_latest_10m(write_parquet=False)
    explain_forecast_10m()
    write_run_meta({"bootstrap": True})
    print("[hyperbootstrap] complete.")

In [132]:
_loop_i = 0
def job_sample_and_score_fast():
    """Hot path every loop; stagger heavy stuff."""
    global _loop_i
    _loop_i += 1
    t0 = time.time()
    sample_once_parallel()
    run_anomaly_zoo_update_live_fast(FAST_CFG["TAIL_WIN"])
    score_latest_10m(write_parquet=False)
    if _loop_i % FAST_CFG["NETWORK_EVERY"] == 0:
        compute_network_features()
    if _loop_i % FAST_CFG["EVENTS_EVERY"] == 0:
        update_events_from_sources()
        enrich_events_and_aggregate()
    if _loop_i % TRAIN_EVERY == 0:
        try: train_forecaster_10m()
        except Exception: pass
    if _loop_i % FAST_CFG["META_EVERY"] == 0:
        write_run_meta()
    dt = time.time() - t0
    sleep_s = max(5, LOOP_SECS - dt)
    print(f"[loop {_loop_i}] {dt:.2f}s → sleep {sleep_s:.2f}s")
    time.sleep(sleep_s) 

In [133]:
if __name__ == "__main__":
    hyperbootstrap(target_rows=800, step_seconds=15, batch_size=500)
    while True:
        job_sample_and_score_fast()

[append] 3 rows → C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\live_dataset.csv
[hyperbootstrap] already have 2240 rows; nothing to do.
[append] 3 rows → C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\live_dataset.csv
[loop 1] 1.45s → sleep 28.55s
[append] 3 rows → C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\live_dataset.csv


  live_idx.update(upd_idx)
  live_idx.update(upd_idx)
  live_idx.update(upd_idx)
  live_idx.update(upd_idx)


[network] v2 updated neighbor features (no NaNs)
[ok] RUN_META.json → C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\RUN_META.json
[loop 2] 2.27s → sleep 27.73s


KeyboardInterrupt: 

In [134]:
job_sample_and_score_fast()

[append] 3 rows → C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\live_dataset.csv
[loop 3] 1.62s → sleep 28.38s


KeyboardInterrupt: 

In [135]:
import time

In [136]:
LOOP_SECS = 60          
TRAIN_EVERY = 240       
FAST_CFG["EVENTS_EVERY"]  = 12
FAST_CFG["NETWORK_EVERY"] = 4
FAST_CFG["META_EVERY"]    = 4
for _ in range(30):     
    job_sample_and_score_fast()

[append] 3 rows → C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\live_dataset.csv


  live_idx.update(upd_idx)
  live_idx.update(upd_idx)
  live_idx.update(upd_idx)
  live_idx.update(upd_idx)


[network] v2 updated neighbor features (no NaNs)
[ok] RUN_META.json → C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\RUN_META.json
[loop 4] 2.15s → sleep 57.85s


KeyboardInterrupt: 

In [137]:
if _loop_i % TRAIN_EVERY == 0:
    try:
        train_if_needed(force=False, min_hours=4.0, min_new_rows=400, label_drift_thr=0.15)
    except Exception as e:
        print("[train_if_needed] error:", e)

In [138]:
try:
    rotate_live_csv(max_mb=100)
except Exception as e:
    print("[rotate] skip:", e)

[rotate] skip: name 'rotate_live_csv' is not defined


In [139]:
WEBHOOK = os.environ.get("ALERT_WEBHOOK", "").strip()
def alerts_enabled() -> bool:
    return bool(WEBHOOK and WEBHOOK.startswith("http"))

In [140]:
WEBHOOK = "https://hooks.slack.com/services/XXX/YYY/ZZZ"

In [141]:
if alerts_enabled() and (_loop_i % 3 == 0):
    try:
        trigger_alerts_if_needed(WEBHOOK, n_tail=80, min_rows_for_incidents=60)
    except Exception as e:
        print("[alert] skip:", e)

In [142]:
try:
    WEBHOOK
except NameError:
    WEBHOOK = ""

In [143]:
def trigger_alerts_if_needed(webhook_url: str, n_tail: int = 80, min_rows_for_incidents: int = 60) -> bool:
    """Post 'red' incidents to a webhook; dedup + cooldown built in."""
    if not webhook_url:
        print("[alert] no webhook configured")
        return False
    try:
        import requests
    except Exception as e:
        print("[alert] requests not available:", e)
        return False
    try:
        dec = decide_latest(n_tail=n_tail, min_rows_for_incidents=min_rows_for_incidents)
    except Exception as e:
        print("[alert] decide_latest failed:", e)
        return False
    reds = dec[dec["level"] == "red"] if isinstance(dec, pd.DataFrame) else pd.DataFrame()
    if reds.empty:
        print("[alert] no red alerts")
        return True
    payload = reds.to_dict(orient="records")
    envelope = {"alerts": payload, "ts": _now_iso()}
    blob = json.dumps(envelope, sort_keys=True)
    h = hashlib.sha1(blob.encode()).hexdigest()
    now = time.time()
    if _last_alert["hash"] == h and (now - _last_alert["ts"] < ALERT_COOLDOWN):
        print("[alert] suppressed (duplicate/cooldown)")
        return True
    try:
        r = requests.post(webhook_url, json=envelope, timeout=8)
        print(f"[alert] posted {len(payload)} red alerts → {r.status_code}")
        if 200 <= r.status_code < 300:
            _last_alert.update({"ts": now, "hash": h})
            return True
        return False
    except Exception as e:
        print("[alert] post failed:", e)
        return False

In [144]:
if WEBHOOK and (_loop_i % 3 == 0):
    trigger_alerts_if_needed(WEBHOOK, n_tail=80, min_rows_for_incidents=60)

In [145]:
try:
    explain_forecast_10m(n_repeats=6)
    print(build_analyst_note_v2())
    print(nightly_report())
except Exception as e:
    print("[nightly] skip:", e)
write_run_meta({"stopped_at_loop": int(_loop_i), "cadence_s": int(LOOP_SECS)})

[explain] wrote C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\explain.json


  live_idx.update(upd_idx)
  live_idx.update(upd_idx)
  live_idx.update(upd_idx)
  live_idx.update(upd_idx)


[network] v2 updated neighbor features (no NaNs)
[ok] Analyst Note exported → C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\analyst_note.pdf
{'ok': True, 'note': 'Fused anomaly now=0.91; 10-min risk=1.00; 30-min risk=0.86. Confidence High. Top drivers: dev_roll_std (+0.0812±0.0074 AP); oracle_ratio (+0.0109±0.0020 AP); spot_twap_gap_bps (+0.0015±0.0032 AP) Propagation: DAI/USDC_univ3 lags peers (corr=0.80). Action: monitor in 5m; if risk > 0.60 or 3 consecutive reds, engage mitigation (widen slippage / reroute).', 'pdf': 'C:\\Users\\aniru\\OneDrive\\Desktop\\ML tutorial\\DEFI Depeg sentinel\\analyst_note.pdf'}
[report] decide_latest failed: name 'decide_latest' is not defined
[ok] Markdown PDF exported → C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\report.pdf
{'ok': True, 'markdown': '# Nightly Model Report\nGenerated: 2025-08-28T21:03:42+00:00\n\n## Winner Detector Today\n- **Winner:** `z_ae`  (by AP proxy on |dev|≥0.3%)\n\nAP scores:\n- z_ae: 0.78

WindowsPath('C:/Users/aniru/OneDrive/Desktop/ML tutorial/DEFI Depeg sentinel/RUN_META.json')

In [146]:
if "risk_forecast_10m" not in CANON_COLS:
    CANON_COLS.append("risk_forecast_10m")

In [147]:
def attach_forecast_to_live(n_tail: int = 200):
    f = score_latest_10m(n_tail=n_tail, write_parquet=False)[["ts","pool","risk_forecast_10m"]]
    live = load_live()
    if live.empty or f.empty:
        print("[attach] nothing to merge"); return live
    live["ts_str"] = _iso_str(live["ts"])
    f["ts_str"]    = _iso_str(f["ts"])
    live_idx = live.set_index(["pool","ts_str"])
    upd_idx  = f.set_index(["pool","ts_str"])[["risk_forecast_10m"]]
    live_idx.update(upd_idx)
    live_updated = live_idx.reset_index().drop(columns=["ts_str"])
    write_live(live_updated)
    return live_updated

In [148]:
live2 = attach_forecast_to_live(n_tail=200)
print(live2.sort_values("ts").tail(6)[["ts","pool","dev","anom_fused","risk_forecast_10m","feeds_fresh"]])

                            ts                       pool       dev  \
2250 2025-08-28 21:02:23+00:00             DAI/USDC_univ3       NaN   
2249 2025-08-28 21:02:23+00:00            USDC/USDT_univ3       NaN   
2251 2025-08-28 21:02:23+00:00                3pool_curve -0.000107   
170                        NaT  2025-08-23 22:38:02+00:00 -0.000161   
171                        NaT  2025-08-23 22:38:02+00:00 -0.000122   
172                        NaT  2025-08-23 22:38:02+00:00 -0.000617   

      anom_fused  risk_forecast_10m  feeds_fresh  
2250    0.018041           0.747306          1.0  
2249    1.000000           0.830525          1.0  
2251    0.910041           1.000000          1.0  
170     1.000000                NaN          NaN  
171     1.000000                NaN          NaN  
172     1.000000                NaN          NaN  


  live_idx.update(upd_idx)


In [149]:
def seed_state_from_live():
    df = load_live()
    if df.empty:
        print("[resume] no live data yet; nothing to seed")
        return False
    last = df.sort_values("ts").groupby("pool", as_index=False).tail(1)
    seeded = 0
    for _, r in last.iterrows():
        p = r["pool"]
        tw = r.get("dex_twap")
        sp = r.get("dex_spot")
        if pd.notna(tw):
            _TWAP.state[p] = float(tw); seeded += 1
        elif pd.notna(sp):
            _TWAP.state[p] = float(sp); seeded += 1
    print(f"[resume] TWAP seeds: {seeded} pools")
    try:
        compute_network_features()
    except Exception as e:
        print("[resume] network seed skipped:", e)
    try:
        explain_forecast_10m(n_repeats=4)
    except Exception:
        pass
    print("[resume] state seeded from live; continue cruising.")
    return True
_ = seed_state_from_live()

[resume] TWAP seeds: 4 pools


  live_idx.update(upd_idx)
  live_idx.update(upd_idx)
  live_idx.update(upd_idx)
  live_idx.update(upd_idx)


[network] v2 updated neighbor features (no NaNs)
[explain] wrote C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\explain.json
[resume] state seeded from live; continue cruising.


In [150]:
LOOP_SECS   = 60      
TRAIN_EVERY = 240      

In [151]:
import os
WEBHOOK = os.environ.get("ALERT_WEBHOOK", "").strip()

In [152]:
if '_loop_i' not in globals():
    _loop_i = 0
for _ in range(30):
    job_sample_and_score_fast()  
    if _loop_i % TRAIN_EVERY == 0:
        try:
            train_if_needed(force=False, min_hours=4.0, min_new_rows=400, label_drift_thr=0.15)
        except Exception as e:
            print("[train_if_needed] error:", e)
    if WEBHOOK and (_loop_i % 3 == 0):
        try:
            trigger_alerts_if_needed(WEBHOOK, n_tail=80, min_rows_for_incidents=60)
        except Exception as e:
            print("[alert] skip:", e)

[append] 3 rows → C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\live_dataset.csv
[loop 5] 0.97s → sleep 59.03s


KeyboardInterrupt: 

In [153]:
from datetime import datetime, timezone
import pandas as pd
import numpy as np
import math
import json

In [154]:
def _utc_now_ts() -> int:
    return int(datetime.now(timezone.utc).timestamp())

In [155]:
def compute_feeds_fresh_gate(heartbeat_s: int = 120, max_block_lag_s: int = 120) -> dict:
    """
    Checks:
      - Each Chainlink feed's `updatedAt` against now.
      - Latest on-chain block timestamp (when not mock).
    Returns dict with details + a boolean 'feeds_fresh'.
    """
    details = {"now": _utc_now_ts(), "feeds": [], "block": {}, "feeds_fresh": True}
    for sym, addr in getattr(CFG, "chainlink_feeds", {}).items():
        r = onchain.get_oracle_price(addr)
        upd = int(r.data.get("updatedAt", 0)) if (r and r.ok and "updatedAt" in r.data) else 0
        lag = details["now"] - upd if upd > 0 else math.inf
        ok  = (lag <= heartbeat_s)
        details["feeds"].append({"symbol": sym, "updatedAt": upd, "lag_s": lag, "ok": ok})
        if not ok:
            details["feeds_fresh"] = False
    try:
        if (getattr(onchain, "mock", True) is False) and getattr(onchain, "w3", None) is not None:
            latest = onchain.w3.eth.get_block("latest")
            blk_ts = int(latest.timestamp)
            blk_lag = details["now"] - blk_ts
            details["block"] = {"timestamp": blk_ts, "lag_s": blk_lag, "ok": blk_lag <= max_block_lag_s}
            if blk_lag > max_block_lag_s:
                details["feeds_fresh"] = False
        else:
            details["block"] = {"timestamp": None, "lag_s": None, "ok": True}
    except Exception as e:
        details["block"] = {"timestamp": None, "lag_s": None, "ok": False, "err": str(e)}
        details["feeds_fresh"] = False
    return details

In [156]:
def stamp_feeds_fresh_on_live(heartbeat_s: int = 120, max_block_lag_s: int = 120) -> dict:
    """Writes `feeds_fresh` onto the **latest row per pool** in live CSV (True/False)."""
    gate = compute_feeds_fresh_gate(heartbeat_s=heartbeat_s, max_block_lag_s=max_block_lag_s)
    df = load_live()
    if df.empty:
        return gate
    latest = df.sort_values("ts").groupby("pool", as_index=False).tail(1).copy()
    latest["feeds_fresh"] = bool(gate["feeds_fresh"])
    live = load_live()
    live["ts_str"] = live["ts"].dt.strftime("%Y-%m-%d %H:%M:%S%z")
    latest["ts_str"] = latest["ts"].dt.strftime("%Y-%m-%d %H:%M:%S%z")
    idx_live = live.set_index(["pool","ts_str"])
    idx_upd  = latest.set_index(["pool","ts_str"])[["feeds_fresh"]]
    idx_live.update(idx_upd)
    out = idx_live.reset_index().drop(columns=["ts_str"])
    write_live(out)
    return gate

In [157]:
_UNIV3_OBS_ABI = [
    {"name":"observe","inputs":[{"type":"uint32[]","name":"secondsAgos"}],
     "outputs":[{"type":"int56[]","name":"tickCumulatives"},
                {"type":"uint160[]","name":"secondsPerLiquidityCumulativeX128"}],
     "stateMutability":"view","type":"function"},
    {"name":"token0","inputs":[],"outputs":[{"type":"address"}], "stateMutability":"view","type":"function"},
    {"name":"token1","inputs":[],"outputs":[{"type":"address"}], "stateMutability":"view","type":"function"},
]
_ERC20_DEC_ABI = [
    {"name":"decimals","inputs":[],"outputs":[{"type":"uint8"}], "stateMutability":"view","type":"function"},
    {"name":"symbol","inputs":[],"outputs":[{"type":"string"}],   "stateMutability":"view","type":"function"},
]

In [158]:
def _tick_to_price_01(tick: float) -> float:
    return float(1.0001 ** tick)

In [159]:
def get_univ3_twap_price(pool_addr: str, window_s: int = 300) -> float:
    """
    Real TWAP over `window_s` using Uniswap v3 observe().
    Returns price accounting for token decimals (token0/token1).
    """
    if getattr(onchain, "mock", True) or not getattr(onchain, "w3", None):
        uni = onchain.get_univ3_price(pool_addr)
        return float(uni.data.get("price", np.nan)) if (uni and uni.ok) else np.nan
    from web3 import Web3
    pool = Web3.to_checksum_address(pool_addr)
    c = onchain.w3.eth.contract(address=pool, abi=_UNIV3_OBS_ABI)
    secs = [window_s, 0]
    tc, _spl = c.functions.observe(secs).call()
    tick_avg = (tc[-1] - tc[0]) / float(window_s)
    t0 = c.functions.token0().call()
    t1 = c.functions.token1().call()
    t0c = onchain.w3.eth.contract(address=t0, abi=_ERC20_DEC_ABI)
    t1c = onchain.w3.eth.contract(address=t1, abi=_ERC20_DEC_ABI)
    d0 = int(t0c.functions.decimals().call())
    d1 = int(t1c.functions.decimals().call())
    p_01 = _tick_to_price_01(tick_avg)            
    price = p_01 * (10 ** (d0 - d1))              
    return float(price)

In [160]:
def refresh_true_twap_and_gap(window_s: int = 300) -> pd.DataFrame:
    """
    For Uniswap v3 pools in CFG.pools:
      - compute real TWAP via observe()
      - update latest row's `dex_twap` and `spot_twap_gap_bps = (dex_spot - dex_twap)/dex_twap * 1e4`
    """
    df = load_live()
    if df.empty:
        return df
    df = df.sort_values("ts")
    latest = df.groupby("pool", as_index=False).tail(1).copy()
    updates = []
    for name, meta in getattr(CFG, "pools", {}).items():
        if meta.get("type") != "uniswap_v3":
            continue
        addr = meta["address"]
        try:
            twap = get_univ3_twap_price(addr, window_s=window_s)
        except Exception:
            twap = np.nan
        sp = latest.loc[latest["pool"] == name, "dex_spot"].astype(float).fillna(np.nan)
        sp = float(sp.iloc[0]) if len(sp) else np.nan
        gap_bps = ((sp - twap) / twap * 1e4) if (np.isfinite(sp) and np.isfinite(twap) and twap != 0) else np.nan
        updates.append({"pool": name, "dex_twap": twap, "spot_twap_gap_bps": gap_bps})
    if not updates:
        return df.tail(6)
    upd = pd.DataFrame(updates)
    latest = latest.drop(columns=["dex_twap","spot_twap_gap_bps"], errors="ignore").merge(upd, on="pool", how="left")
    live = load_live()
    live["ts_str"]   = live["ts"].dt.strftime("%Y-%m-%d %H:%M:%S%z")
    latest["ts_str"] = latest["ts"].dt.strftime("%Y-%m-%d %H:%M:%S%z")
    idx_live = live.set_index(["pool","ts_str"])
    idx_upd  = latest.set_index(["pool","ts_str"])[["dex_twap","spot_twap_gap_bps"]]
    idx_live.update(idx_upd)
    out = idx_live.reset_index().drop(columns=["ts_str"])
    write_live(out)
    return out.sort_values("ts").tail(10)

In [161]:
from pydantic import BaseModel, Field, field_validator, ConfigDict, FieldValidationInfo
from typing import Optional, Any
import pandas as pd
import numpy as np

  from pydantic import BaseModel, Field, field_validator, ConfigDict, FieldValidationInfo


In [162]:
def _is_na(v: Any) -> bool:
    try:
        if v is None: return True
        if isinstance(v, float) and (np.isnan(v) or np.isinf(v)): return True
        if isinstance(v, str) and v.strip() == "": return True
        return bool(pd.isna(v))
    except Exception:
        return False

In [163]:
class LiveRowModel(BaseModel):
    model_config = ConfigDict(extra='allow')  
    ts: str
    pool: str
    dex_spot: Optional[float] = None
    oracle_px: Optional[float] = None
    dev: Optional[float] = None
    dex_twap: Optional[float] = None
    oracle_ratio: Optional[float] = None
    dev_roll_std: Optional[float] = Field(default=None, ge=0)
    tvl_outflow_rate: Optional[float] = None
    virtual_price: Optional[float] = Field(default=None, ge=0)
    spot_twap_gap_bps: Optional[float] = None
    r0: Optional[float] = Field(default=None, ge=0)
    r1: Optional[float] = Field(default=None, ge=0)
    feeds_fresh: bool = False
    run_quality_pass: bool = True
    y_10m: Optional[int] = Field(default=None, ge=0, le=1)
    y_30m: Optional[int] = Field(default=None, ge=0, le=1)
    block: Optional[int] = Field(default=None, ge=0)
    @field_validator('ts', mode='before')
    @classmethod
    def _ts_to_str(cls, v: Any) -> str:
        if isinstance(v, pd.Timestamp):
            v = v.tz_localize('UTC') if v.tzinfo is None else v.tz_convert('UTC')
            return v.strftime("%Y-%m-%d %H:%M:%S%z")
        return str(v)
    @field_validator('dex_spot','oracle_px','dev','dex_twap','oracle_ratio',
                     'dev_roll_std','tvl_outflow_rate','virtual_price',
                     'spot_twap_gap_bps','r0','r1', mode='before')
    @classmethod
    def _float_nan_to_none(cls, v: Any):
        if _is_na(v): return None
        try: return float(v)
        except Exception: return None
    @field_validator('y_10m','y_30m','block', mode='before')
    @classmethod
    def _int_nan_to_none(cls, v: Any, info: FieldValidationInfo):
        if _is_na(v): return None
        try:
            iv = int(float(v))
        except Exception:
            return None
        if info.field_name in ('y_10m','y_30m'):
            return 1 if iv >= 1 else 0
        return iv
    @field_validator('feeds_fresh','run_quality_pass', mode='before')
    @classmethod
    def _boolify(cls, v: Any) -> bool:
        if isinstance(v, (bool, np.bool_)): return bool(v)
        if _is_na(v): return False
        s = str(v).strip().lower()
        if s in ('1','true','yes','y','on'): return True
        if s in ('0','false','no','n','off','nan','none',''): return False
        try: return bool(int(float(v)))
        except Exception: return False

In [164]:
def _coerce_bool_series(s: pd.Series) -> pd.Series:
    return s.apply(lambda v: True if str(v).strip().lower() in ('1','true','yes','y','on')
                   else False if str(v).strip().lower() in ('0','false','no','n','off','nan','none','')
                   else (bool(v) if isinstance(v, (bool, np.bool_)) else False))

In [165]:
def _coerce_df_min(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    for c in ("ts","pool","dex_spot","oracle_px","dev","feeds_fresh","run_quality_pass"):
        if c not in df.columns:
            df[c] = np.nan if c not in ("feeds_fresh","run_quality_pass") else False
    if not pd.api.types.is_string_dtype(df["ts"]):
        df["ts"] = pd.to_datetime(df["ts"], utc=True, errors="coerce")
    df["ts"] = df["ts"].apply(lambda x: x.strftime("%Y-%m-%d %H:%M:%S%z") if isinstance(x, pd.Timestamp) else str(x))
    for b in ("feeds_fresh","run_quality_pass"):
        if b in df.columns:
            df[b] = _coerce_bool_series(df[b])
        else:
            df[b] = False
    for c in ("y_10m","y_30m","block"):
        if c in df.columns:
            def _coerce_int(v):
                if _is_na(v): return None
                try:
                    iv = int(float(v))
                except Exception:
                    return None
                if c in ("y_10m","y_30m"):
                    return 1 if iv >= 1 else 0
                return iv
            df[c] = df[c].apply(_coerce_int)
    return df
def _validate_rows_only(rows: pd.DataFrame) -> pd.DataFrame:
    rows = _coerce_df_min(rows)
    out = []
    for i, r in rows.iterrows():
        try:
            m = LiveRowModel(**r.to_dict())  
            out.append(m.model_dump())
        except Exception as e:
            raise ValueError(f"[append validation] bad row at index {i}: {e}")
    return pd.DataFrame(out)

In [166]:
def append_live_validated_rows_only(rows: pd.DataFrame) -> None:
    cleaned = _validate_rows_only(rows)
    base = load_live()
    merged = pd.concat([base, cleaned], ignore_index=True)
    write_live(merged)
    print(f"[append+validate] appended {len(cleaned)} rows")

In [167]:
def migrate_existing_live_to_valid() -> None:
    base = load_live()
    if base.empty:
        print("[migrate] no existing rows; nothing to do.")
        return
    base = _coerce_df_min(base)
    for c, default in [
        ("dev_roll_std", 0.0),
        ("virtual_price", None),
        ("dex_twap", None),
        ("oracle_ratio", None),
        ("spot_twap_gap_bps", None),
        ("r0", None), ("r1", None),
        ("y_10m", None), ("y_30m", None),
        ("block", None),
    ]:
        if c in base.columns:
            if default is None:
                base[c] = base[c].apply(lambda v: None if _is_na(v) else v)
            else:
                base[c] = base[c].apply(lambda v: default if _is_na(v) else v)
    cleaned = _validate_rows_only(base)
    write_live(cleaned)
    print(f"[migrate] repaired & wrote {len(cleaned)} rows → {LIVE_CSV}")

In [168]:
migrate_existing_live_to_valid()
try:
    _append_live_orig
except NameError:
    _append_live_orig = append_live
def append_live(df):
    return append_live_validated_rows_only(df)
print("[patch] append_live → append_live_validated_rows_only")

[migrate] repaired & wrote 2255 rows → C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\live_dataset.csv
[patch] append_live → append_live_validated_rows_only


In [169]:
from pathlib import Path
import json
import pandas as pd
import numpy as np

In [170]:
def enforce_live_dtypes(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    for b in ("feeds_fresh","run_quality_pass"):
        if b in df.columns:
            df[b] = pd.Series(df[b]).astype("boolean").fillna(False)
    for c in ("y_10m","y_30m","block"):
        if c in df.columns:
            s = pd.to_numeric(df[c], errors="coerce")
            if c in ("y_10m","y_30m"):
                s = s.clip(0,1)  
                s = s.fillna(0).astype("Int64")
            else:
                s = s.round().astype("Int64")
            df[c] = s
    float_cols = [
        "dex_spot","oracle_px","dev","dex_twap","oracle_ratio","dev_roll_std","tvl_outflow_rate",
        "virtual_price","spot_twap_gap_bps","r0","r1","r0_delta","r1_delta",
        "neighbor_max_dev","neighbor_avg_anom","corr_best",
        "z_if","z_lof","z_ocsvm","z_cusum","z_ae","z_ae_seq",
        "anom_fused","risk_forecast_10m","risk_forecast_30m"
    ]
    for c in float_cols:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors="coerce")
    return df

In [171]:
def ensure_labels_fixed_on_live(
    dev_thr: float = 0.005,
    fused_thr: float = 0.90,
    h10: int = 10,
    h30: int = 30,
    persist_version: bool = True,
    fill_only: bool = False,  
):
    df = load_live()
    if df.empty:
        print("[labels] live empty; nothing to do.")
        return df
    df = df.sort_values(["pool","ts"]).reset_index(drop=True)
    has_fused = "anom_fused" in df.columns
    def _label_group(g: pd.DataFrame, horizon: int) -> pd.Series:
        dev_cond   = g["dev"].abs().fillna(0).ge(dev_thr)
        fused_cond = g["anom_fused"].fillna(0).ge(fused_thr) if has_fused else pd.Series(False, index=g.index)
        cond = (dev_cond | fused_cond).astype(bool)
        any_future = pd.Series(False, index=g.index)
        for j in range(1, horizon + 1):
            any_future = any_future | cond.shift(-j, fill_value=False)
        return any_future.astype("Int64")
    y10 = df.groupby("pool", sort=False, group_keys=False).apply(lambda g: _label_group(g, h10))
    y30 = df.groupby("pool", sort=False, group_keys=False).apply(lambda g: _label_group(g, h30))
    if fill_only:
        if "y_10m" not in df.columns:
            df["y_10m"] = pd.Series([pd.NA]*len(df), dtype="Int64")
        if "y_30m" not in df.columns:
            df["y_30m"] = pd.Series([pd.NA]*len(df), dtype="Int64")
        m10 = df["y_10m"].isna()
        m30 = df["y_30m"].isna()
        df.loc[m10, "y_10m"] = y10.loc[m10]
        df.loc[m30, "y_30m"] = y30.loc[m30]
    else:
        df["y_10m"] = y10
        df["y_30m"] = y30
    df = enforce_live_dtypes(df)
    write_live(df)
    print(f"[labels] wrote y_10m/y_30m with dev_thr={dev_thr:.4f}, fused_thr={fused_thr:.2f}, h10={h10}, h30={h30}")
    if persist_version:
        try:
            ver = {
                "ts": _now_iso(),
                "labeling": {
                    "y_10m": {"dev_thr": float(dev_thr), "fused_thr": float(fused_thr), "horizon": int(h10)},
                    "y_30m": {"dev_thr": float(dev_thr), "fused_thr": float(fused_thr), "horizon": int(h30)},
                }
            }
            (OUT_MODEL / "version.json").write_text(json.dumps(ver, indent=2))
            print(f"[labels] persisted thresholds → {OUT_MODEL / 'version.json'}")
        except Exception as e:
            print("[labels] could not persist version.json:", e)
    return df

In [172]:
feat = sample_once_parallel() if 'sample_once_parallel' in globals() else sample_once()
refresh_true_twap_and_gap(window_s=300)
stamp_feeds_fresh_on_live(heartbeat_s=120, max_block_lag_s=120)
ensure_labels_fixed_on_live(dev_thr=0.005, fused_thr=0.90, h10=10, h30=30, persist_version=True)

  merged = pd.concat([base, cleaned], ignore_index=True)


[append+validate] appended 3 rows


  idx_live.update(idx_upd)
  idx_live.update(idx_upd)
  idx_live.update(idx_upd)
  idx_live.update(idx_upd)
  y10 = df.groupby("pool", sort=False, group_keys=False).apply(lambda g: _label_group(g, h10))
  y30 = df.groupby("pool", sort=False, group_keys=False).apply(lambda g: _label_group(g, h30))


[labels] wrote y_10m/y_30m with dev_thr=0.0050, fused_thr=0.90, h10=10, h30=30
[labels] persisted thresholds → C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\model\version.json


Unnamed: 0,ts,pool,dex_spot,dex_twap,oracle_ratio,dev,dev_roll_std,tvl_outflow_rate,virtual_price,spot_twap_gap_bps,...,z_cusum,z_ae,anom_fused,y_10m,y_30m,neighbor_max_dev,neighbor_avg_anom,lead_lag_best,corr_best,risk_forecast_10m
0,2025-08-26 14:57:08+00:00,2025-08-23 22:38:02+00:00,0.999490,0.999490,0.999477,-0.000523,0.000000,,0.999486,-1.110789e-12,...,0.0,0.001043,0.100257,1,1,,,,,
1,2025-08-26 14:57:23+00:00,2025-08-23 22:38:02+00:00,0.999401,0.999472,0.999370,-0.000630,0.000000,,0.999483,-7.115763e-01,...,0.0,0.001192,0.111186,1,1,,,,,
2,2025-08-26 14:57:38+00:00,2025-08-23 22:38:02+00:00,0.999519,0.999482,0.999466,-0.000534,0.000000,,0.999454,3.738850e-01,...,0.0,0.001013,0.105200,1,1,,,,,
3,2025-08-26 14:57:53+00:00,2025-08-23 22:38:02+00:00,0.999417,0.999469,0.999356,-0.000644,0.000000,,0.999456,-5.219837e-01,...,0.0,0.001149,0.111402,1,1,,,,,
4,2025-08-26 14:58:08+00:00,2025-08-23 22:38:02+00:00,0.999303,0.999436,0.999234,-0.000766,0.000098,,0.999464,-1.323117e+00,...,1.0,0.000992,1.000000,1,1,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2253,2025-08-28 21:00:53+00:00,USDC/USDT_univ3,1.000051,1.000022,,,0.000000,-0.001644,,2.977360e-01,...,1.0,0.003168,1.000000,1,1,0.004702,0.980871,8.0,0.432038,0.542200
2254,2025-08-28 21:01:39+00:00,USDC/USDT_univ3,0.999471,0.999912,,,0.000000,0.001967,,-4.404776e+00,...,1.0,0.002050,1.000000,1,1,,,,,0.819711
2255,2025-08-28 21:02:23+00:00,USDC/USDT_univ3,1.000072,0.999944,,,0.000000,0.001551,,1.286553e+00,...,1.0,0.000079,1.000000,1,1,0.004702,0.968146,8.0,0.432541,0.830525
2256,2025-08-28 21:03:53+00:00,USDC/USDT_univ3,0.999976,0.999950,,,0.000000,0.000926,,2.558185e-01,...,1.0,0.000057,1.000000,0,0,,,,,


In [173]:
from pathlib import Path
import json, math
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
_SK = globals().get("_SK_OK", True)
try:
    from sklearn.metrics import average_precision_score as AP
except Exception:
    _SK = False

In [174]:
ART_DIR = (OUT / "artifacts")
ART_DIR.mkdir(parents=True, exist_ok=True)
CAL_10M_JSON = ART_DIR / "calibration_10m.json"
CAL_10M_PNG  = ART_DIR / "calibration_10m.png"
CAL_30M_JSON = ART_DIR / "calibration_30m.json"
CAL_30M_PNG  = ART_DIR / "calibration_30m.png"
DET_AP_JSON  = ART_DIR / "detector_pr_auc.json"
DET_AP_PNG   = ART_DIR / "detector_pr_auc.png"
EXPLAIN_30M_JSON = OUT / "explain_30m.json"
DRIFT_JSON  = ART_DIR / "feature_drift.json"

In [175]:
def _safe_np(a):
    return np.asarray(a, dtype=float)
def _ensure_labels_present(df: pd.DataFrame):
    if df.empty:
        return df
    if "y_10m" not in df.columns or df["y_10m"].nunique(dropna=True) < 2:
        if "ensure_labels_fixed_on_live" in globals():
            ensure_labels_fixed_on_live()
        else:
            df["y_10m"] = _ensure_labels(df)
            write_live(df)
    if "y_30m" not in df.columns or df["y_30m"].isna().all():
        if "ensure_labels_fixed_on_live" in globals():
            ensure_labels_fixed_on_live(fill_only=False)
        else:
            df["y_30m"] = label_targets(
                df, horizon=30, dev_thr=0.005,
                fused_col="anom_fused" if "anom_fused" in df.columns else None
            ).astype(int)
            write_live(df)

In [176]:
def _calibration_bins(probs: np.ndarray, labels: np.ndarray, nbins: int = 10):
    p = _safe_np(probs)
    y = _safe_np(labels)
    if p.size == 0 or y.size == 0:
        return pd.DataFrame(columns=["bin","p_mean","y_mean","n"])
    p = np.clip(p, 0.0, 1.0)
    bins = np.linspace(0, 1, nbins+1)
    idx = np.digitize(p, bins, right=False)  
    idx[idx==nbins+1] = nbins
    df = pd.DataFrame({"p": p, "y": y, "bin": idx})
    out = df.groupby("bin").agg(p_mean=("p","mean"), y_mean=("y","mean"), n=("y","size")).reset_index()
    return out

In [177]:
def _save_calibration_artifacts(horizon: int = 10, nbins: int = 10):
    live = load_live()
    if live.empty:
        return None
    if horizon == 10:
        path = globals().get("FORECAST_10M_PARQUET", OUT / "forecast_10m.parquet")
        col  = "risk_forecast_10m"
        ycol = "y_10m"
        scorer = score_latest_10m
    else:
        path = globals().get("FORECAST_30M_PARQUET", OUT / "forecast_30m.parquet")
        col  = "risk_forecast_30m"
        ycol = "y_30m"
        scorer = score_latest_30m if "score_latest_30m" in globals() else None
    _ensure_labels_present(live)
    if Path(path).exists():
        f = pd.read_parquet(path)
    else:
        if scorer is None:
            return None
        f = scorer(n_tail=200, write_parquet=False)
    use = f.merge(live[["ts","pool",ycol]], on=["ts","pool"], how="left")
    use = use.dropna(subset=[col])
    if ycol not in use.columns:
        return None
    y = pd.to_numeric(use[ycol], errors="coerce").fillna(0).astype(int).to_numpy()
    p = pd.to_numeric(use[col], errors="coerce").fillna(0.0).to_numpy()
    if (y.sum() == 0) or (np.unique(y).size < 2):
        calib = pd.DataFrame(columns=["bin","p_mean","y_mean","n"])
    else:
        calib = _calibration_bins(p, y, nbins=nbins)
    obj = {
        "horizon_min": horizon,
        "bins": calib.to_dict(orient="records"),
        "counts": {"n": int(len(use)), "positives": int(y.sum())}
    }
    jpath = CAL_10M_JSON if horizon==10 else CAL_30M_JSON
    Path(jpath).write_text(json.dumps(obj, indent=2))
    png = CAL_10M_PNG if horizon==10 else CAL_30M_PNG
    plt.figure(figsize=(6,5))
    xx = np.linspace(0,1,100)
    plt.plot(xx, xx, linestyle="--")
    if not calib.empty:
        plt.scatter(calib["p_mean"], calib["y_mean"], s=30)
        for _, r in calib.iterrows():
            plt.annotate(str(int(r["n"])), (r["p_mean"], r["y_mean"]), xytext=(4,4), textcoords="offset points", fontsize=8)
    plt.xlim(0,1); plt.ylim(0,1)
    plt.xlabel("Predicted probability")
    plt.ylabel("Observed frequency")
    plt.title(f"Calibration ({horizon}m) — n={len(use)}, pos={y.sum()}")
    plt.tight_layout()
    plt.savefig(png, dpi=140)
    plt.close()
    return {"json": str(jpath), "png": str(png)}

In [178]:
def save_all_calibration_artifacts():
    out = {"h10": _save_calibration_artifacts(10), "h30": _save_calibration_artifacts(30)}
    return out

In [179]:
def save_detector_pr_auc(thr_abs_dev: float = 0.003):
    df = load_live()
    if df.empty: 
        return None
    y = (df["dev"].abs() >= float(thr_abs_dev)).astype(int).values
    cols = [c for c in ["z_if","z_lof","z_ocsvm","z_cusum","z_ae","z_ae_seq","anom_fused"] if c in df.columns]
    scores = {}
    if _SK and y.sum() > 0 and y.sum() < len(y):
        for c in cols:
            s = pd.to_numeric(df[c], errors="coerce").fillna(0.0).values
            try:
                scores[c] = float(AP(y, s))
            except Exception:
                scores[c] = float("nan")
    else:
        for c in cols:
            scores[c] = float("nan")
    winner = None
    if scores:
        winner = max(scores.items(), key=lambda kv: (kv[1] if not math.isnan(kv[1]) else -1, kv[0]=="anom_fused"))[0]
    payload = {"threshold_abs_dev": thr_abs_dev, "ap": scores, "winner": winner}
    DET_AP_JSON.write_text(json.dumps(payload, indent=2))
    plt.figure(figsize=(7,4))
    keys = list(scores.keys())
    vals = [scores[k] if not (scores[k] is None or math.isnan(scores[k])) else 0.0 for k in keys]
    plt.bar(range(len(keys)), vals)
    plt.xticks(range(len(keys)), keys, rotation=20, ha="right")
    plt.ylim(0, 1)
    plt.ylabel("PR-AUC")
    plt.title("Detector PR-AUC (|dev| ≥ {:.2f}%)".format(thr_abs_dev*100))
    plt.tight_layout()
    plt.savefig(DET_AP_PNG, dpi=140)
    plt.close()
    return {"json": str(DET_AP_JSON), "png": str(DET_AP_PNG), "winner": winner, "scores": scores}

In [180]:
def explain_forecast_30m(feature_cols: list | None = None, n_repeats: int = 8) -> dict:
    """Permutation-importance for the 30m forecaster; mirrors your 10m explain."""
    if not globals().get("_SK_OK", True):
        payload = {"ts": _now_iso(), "top_contributors": [], "all_features": []}
        EXPLAIN_30M_JSON.write_text(json.dumps(payload, indent=2))
        return payload
    df = load_live()
    if df.empty:
        raise ValueError("live_dataset is empty.")
    _ensure_labels_present(df)
    if feature_cols is None:
        feature_cols = [
            "dev","dev_roll_std","tvl_outflow_rate","spot_twap_gap_bps",
            "oracle_ratio","anom_fused","r0_delta","r1_delta",
            "event_severity_max_24h","event_count_24h"
        ]
    use_cols = [c for c in feature_cols if c in df.columns]
    X = df[use_cols].replace([np.inf,-np.inf], np.nan).fillna(0.0).to_numpy()
    y = pd.to_numeric(df["y_30m"], errors="coerce").fillna(0).astype(int).to_numpy()
    tr_idx, te_idx = _time_split_idx(df["ts"], 0.70)
    if te_idx.size == 0:
        payload = {"ts": _now_iso(), "top_contributors": [], "all_features": []}
        EXPLAIN_30M_JSON.write_text(json.dumps(payload, indent=2))
        return payload
    Xte, yte = X[te_idx], y[te_idx]
    clf, calib = _load_forecaster_30m() if "._load_forecaster_30m" in str(globals().keys()) or "_load_forecaster_30m" in globals() else (None, None)
    if clf is None and calib is None and "train_forecaster_30m" in globals():
        _ = train_forecaster_30m(feature_cols=use_cols)
        clf, calib = _load_forecaster_30m()
    model = calib if calib is not None else clf
    if model is None or len(np.unique(yte)) < 2:
        payload = {"ts": _now_iso(), "top_contributors": [], "all_features": []}
        EXPLAIN_30M_JSON.write_text(json.dumps(payload, indent=2))
        return payload
    from sklearn.inspection import permutation_importance
    r = permutation_importance(model, Xte, yte, n_repeats=n_repeats, scoring="average_precision", random_state=42)
    imp = sorted(zip(use_cols, r.importances_mean, r.importances_std), key=lambda z: z[1], reverse=True)
    top3 = [f"{name} (+{mean:.4f}±{std:.4f} AP)" for name, mean, std in imp[:3]]
    payload = {"ts": _now_iso(), "top_contributors": top3,
               "all_features": [{"feature": n, "mean": float(m), "std": float(s)} for n,m,s in imp]}
    EXPLAIN_30M_JSON.write_text(json.dumps(payload, indent=2))
    return payload

In [181]:
def _empirical_ks(xa: np.ndarray, xb: np.ndarray) -> float:
    """Minimal KS distance; no scipy dependency."""
    xa = xa[~np.isnan(xa)]; xb = xb[~np.isnan(xb)]
    if xa.size < 5 or xb.size < 5:
        return 0.0
    xs = np.sort(np.unique(np.concatenate([xa, xb])))
    if xs.size == 0:
        return 0.0
    def _cdf(x, v):
        return (x <= v).sum() / x.size
    ks = 0.0
    for v in xs:
        ks = max(ks, abs(_cdf(xa, v) - _cdf(xb, v)))
    return float(ks)
def _psi(base: np.ndarray, cur: np.ndarray, nbins: int = 10) -> float:
    base = base[~np.isnan(base)]; cur = cur[~np.isnan(cur)]
    if base.size < 10 or cur.size < 10:
        return 0.0
    qs = np.quantile(base, np.linspace(0,1,nbins+1))
    qs[0] -= 1e-9; qs[-1] += 1e-9 
    b = np.histogram(base, bins=qs)[0]; c = np.histogram(cur, bins=qs)[0]
    b = b / max(b.sum(), 1); c = c / max(c.sum(), 1)
    b = np.where(b==0, 1e-6, b)
    c = np.where(c==0, 1e-6, c)
    return float(np.sum((c - b) * np.log(c / b)))

In [182]:
def compute_feature_drift(
    features: list[str] | None = None,
    ks_thr: float = 0.20,
    psi_thr: float = 0.25
) -> dict:
    """
    Compare train(70%) vs recent(30%) distributions for selected features.
    Returns a JSON-serializable dict with per-feature KS and PSI and a boolean 'drift'.
    """
    df = load_live()
    if df.empty:
        return {"drift": False, "reason": "no data", "metrics": {}}
    if features is None:
        features = [
            "dev","dev_roll_std","tvl_outflow_rate","spot_twap_gap_bps",
            "oracle_ratio","anom_fused"
        ]
        features = [c for c in features if c in df.columns]
    tr_idx, te_idx = _time_split_idx(df["ts"], 0.70)
    base = df.iloc[tr_idx, :].copy()
    cur  = df.iloc[te_idx, :].copy()
    metrics = {}
    alert = False
    for c in features:
        a = pd.to_numeric(base[c], errors="coerce").values
        b = pd.to_numeric(cur[c],  errors="coerce").values
        ks  = _empirical_ks(a, b)
        psi = _psi(a, b)
        metrics[c] = {"ks": float(ks), "psi": float(psi)}
        if (ks >= ks_thr) or (psi >= psi_thr):
            alert = True
    payload = {"drift": bool(alert), "reason": ("feature drift" if alert else "ok"),
               "thresholds": {"ks": ks_thr, "psi": psi_thr},
               "metrics": metrics, "n_train": int(len(base)), "n_recent": int(len(cur))}
    DRIFT_JSON.write_text(json.dumps(payload, indent=2))
    return payload
def retrain_check() -> dict:
    """
    Returns {should_retrain, reason, drift?}
    - True if feature drift exceeds threshold, or if scheduled nightly condition is hit.
    """
    drift = compute_feature_drift()
    if drift.get("drift", False):
        return {"should_retrain": True, "reason": "feature drift", "drift": drift}
    return {"should_retrain": True, "reason": "scheduled nightly or drift threshold (PR-AUC drop) hit", "drift": drift}

In [183]:
def nightly_report_v2() -> dict:
    """
    Nightly Markdown report:
      - winner detector by PR-AUC + artifact saved
      - forecast calibration (10m & 30m) + artifacts saved
      - incidents summary (as before)
    Also exports to PDF.
    """
    df = load_live()
    if df.empty:
        return {"ok": False, "markdown": "[report] no data"}
    det = save_detector_pr_auc(thr_abs_dev=0.003)
    winner = (det or {}).get("winner", "anom_fused")
    cal_art = save_all_calibration_artifacts()
    try:
        recent = df.sort_values("ts").groupby("pool").tail(min(100, len(df)))
        dec = decide_latest(n_tail=min(100, len(recent))) if "decide_latest" in globals() else pd.DataFrame()
        reds = dec[dec["level"] == "red"] if isinstance(dec, pd.DataFrame) else pd.DataFrame()
    except Exception:
        reds = pd.DataFrame()
    md = []
    md.append("# Nightly Model Report (v2)")
    md.append(f"Generated: {_now_iso()}\n")
    md.append("## Winner Detector Today")
    if det:
        md.append(f"- **Winner:** `{det.get('winner')}`  (PR-AUC: {det['scores'].get(det.get('winner'), float('nan')):.3f})")
        md.append(f"- JSON: `{DET_AP_JSON.name}`  PNG: `{DET_AP_PNG.name}`")
    else:
        md.append("- (no scores)")
    md.append("\n## Forecast Calibration")
    if cal_art and cal_art.get("h10"):
        md.append(f"- **10m** → JSON: `{Path(cal_art['h10']['json']).name}`  PNG: `{Path(cal_art['h10']['png']).name}`")
    if cal_art and cal_art.get("h30"):
        md.append(f"- **30m** → JSON: `{Path(cal_art['h30']['json']).name}`  PNG: `{Path(cal_art['h30']['png']).name}`")
    drift = compute_feature_drift()
    md.append("\n## Feature Drift")
    md.append(f"- drift: **{drift.get('drift')}**  reason: {drift.get('reason')}")
    md.append(f"- JSON: `{DRIFT_JSON.name}`")
    md.append("\n## Incidents (last window)")
    if isinstance(reds, pd.DataFrame) and not reds.empty:
        for _, r in reds.iterrows():
            an = r.get("anom_fused", np.nan)
            p10 = r.get("risk_forecast_10m", np.nan)
            md.append(f"- {r['ts']} | {r['pool']} | fused={an:.2f} | p10={p10:.2f}")
    else:
        md.append("- None")
    md_text = "\n".join(md)
    pdf_path = export_markdown_pdf(md_text, OUT / "report.pdf", title="Depeg Sentinel — Nightly Report v2")
    return {"ok": True, "markdown": md_text[:800] + "...", "pdf": str(pdf_path),
            "artifacts": {"detector_pr_auc": {"json": str(DET_AP_JSON), "png": str(DET_AP_PNG)},
                          "calibration_10m": cal_art.get("h10") if cal_art else None,
                          "calibration_30m": cal_art.get("h30") if cal_art else None},
            "drift": drift}

In [184]:
nightly_report = nightly_report_v2 

In [185]:
res_report = nightly_report()
print(res_report)
exp30 = explain_forecast_30m(n_repeats=8)
print(EXPLAIN_30M_JSON, exp30.get("top_contributors"))
drift = compute_feature_drift()
print(drift)
print(retrain_check())

[ok] Markdown PDF exported → C:\Users\aniru\OneDrive\Desktop\ML tutorial\DEFI Depeg sentinel\report.pdf
{'ok': True, 'markdown': '# Nightly Model Report (v2)\nGenerated: 2025-08-28T21:07:34+00:00\n\n## Winner Detector Today\n- **Winner:** `z_if`  (PR-AUC: 0.704)\n- JSON: `detector_pr_auc.json`  PNG: `detector_pr_auc.png`\n\n## Forecast Calibration\n- **10m** → JSON: `calibration_10m.json`  PNG: `calibration_10m.png`\n- **30m** → JSON: `calibration_30m.json`  PNG: `calibration_30m.png`\n\n## Feature Drift\n- drift: **True**  reason: feature drift\n- JSON: `feature_drift.json`\n\n## Incidents (last window)\n- None...', 'pdf': 'C:\\Users\\aniru\\OneDrive\\Desktop\\ML tutorial\\DEFI Depeg sentinel\\report.pdf', 'artifacts': {'detector_pr_auc': {'json': 'C:\\Users\\aniru\\OneDrive\\Desktop\\ML tutorial\\DEFI Depeg sentinel\\artifacts\\detector_pr_auc.json', 'png': 'C:\\Users\\aniru\\OneDrive\\Desktop\\ML tutorial\\DEFI Depeg sentinel\\artifacts\\detector_pr_auc.png'}, 'calibration_10m': {'j

In [186]:
try:
    CANON_COLS
except NameError:
    CANON_COLS = []
for _c in ["neighbor_max_dev","neighbor_avg_anom","lead_lag_best","corr_best"]:
    if _c not in CANON_COLS:
        CANON_COLS.append(_c)
import numpy as np, pandas as pd
from collections import deque

In [187]:
_LAST_NET_WIN = globals().get("_LAST_NET_WIN", 60)
_LAST_MAX_LAG = globals().get("_LAST_MAX_LAG", 10)
_hist_dev  = globals().get("_hist_dev", {})
_hist_anom = globals().get("_hist_anom", {})

In [188]:
def _push_hist(d: dict, key: str, val: float, win: int = 60):
    q = d.get(key)
    if q is None:
        q = deque(maxlen=win)
        d[key] = q
    q.append(float(val) if pd.notna(val) else 0.0)
def _rolling_corr(x: np.ndarray, y: np.ndarray) -> float:
    if len(x) < 3 or len(y) < 3 or len(x) != len(y):
        return np.nan
    x = np.asarray(x); y = np.asarray(y)
    if x.std() == 0 or y.std() == 0:
        return 0.0
    return float(np.corrcoef(x, y)[0,1])

In [189]:
def _cross_corr_lag(x: np.ndarray, y: np.ndarray, max_lag: int = 10) -> tuple[float,int]:
    if len(x) < 5 or len(y) < 5:
        return np.nan, 0
    x = np.asarray(x); y = np.asarray(y)
    best, best_lag = -2.0, 0
    for lag in range(-max_lag, max_lag+1):
        if lag < 0:
            xs, ys = x[:lag], y[-lag:]
        elif lag > 0:
            xs, ys = x[lag:], y[:-lag]
        else:
            xs, ys = x, y
        if len(xs) < 5:
            continue
        c = _rolling_corr(xs, ys)
        if np.isnan(c):
            continue
        if c > best:
            best, best_lag = c, lag
    return float(best), int(best_lag)

In [190]:
def compute_network_features(win: int = 60, max_lag: int = 10) -> pd.DataFrame:
    """
    Compute neighbor_max_dev / neighbor_avg_anom and best lead/lag correlation per pool.
    - Remembers `win` and `max_lag` in globals for audit strings.
    - Updates last rows in live CSV.
    """
    global _LAST_NET_WIN, _LAST_MAX_LAG
    _LAST_NET_WIN = int(win)
    _LAST_MAX_LAG = int(max_lag)
    df = load_live().copy()
    if df.empty:
        print("[network] live is empty; skipping")
        return pd.DataFrame(columns=["ts","pool","neighbor_max_dev","neighbor_avg_anom","lead_lag_best","corr_best"])
    df = df.sort_values("ts")
    for _, r in df.iterrows():
        _push_hist(_hist_dev,  str(r.get("pool")), float(r.get("dev", 0.0)), win)
        _push_hist(_hist_anom, str(r.get("pool")), float(r.get("anom_fused", 0.0)), win)
    pools = list(df["pool"].dropna().unique())
    rows = []
    for p in pools:
        dev_p = list(_hist_dev.get(p, []))
        neigh = [q for q in pools if q != p]
        neigh_max_dev  = 0.0
        neigh_avg_anom = 0.0
        lead_lag_best  = 0
        corr_best      = 0.0
        used = 0
        for q in neigh:
            dev_q = list(_hist_dev.get(q, []))
            an_q  = list(_hist_anom.get(q, []))
            if dev_q:
                neigh_max_dev = max(neigh_max_dev, float(np.nanmax(np.abs(dev_q))))
            if an_q:
                neigh_avg_anom += float(np.nanmean(an_q)); used += 1
            if dev_p and dev_q:
                c, lag = _cross_corr_lag(np.array(dev_p), np.array(dev_q), max_lag=max_lag)
                if np.isfinite(c) and abs(c) > abs(corr_best):
                    corr_best, lead_lag_best = float(c), int(lag)
        if used > 0:
            neigh_avg_anom /= used
        rows.append({
            "pool": p,
            "neighbor_max_dev": float(neigh_max_dev),
            "neighbor_avg_anom": float(neigh_avg_anom),
            "lead_lag_best": int(lead_lag_best),
            "corr_best": float(corr_best),
        })
    latest = df.groupby("pool", as_index=False).tail(1).copy()
    net = pd.DataFrame(rows)
    latest = latest.drop(columns=["neighbor_max_dev","neighbor_avg_anom","lead_lag_best","corr_best"], errors="ignore")
    latest = latest.merge(net, on="pool", how="left")
    live = load_live()
    live["ts_str"]   = _iso_str(live["ts"])
    latest["ts_str"] = _iso_str(latest["ts"])
    live_idx = live.set_index(["pool","ts_str"])
    upd_idx  = latest.set_index(["pool","ts_str"])[["neighbor_max_dev","neighbor_avg_anom","lead_lag_best","corr_best"]]
    live_idx.update(upd_idx)
    live_updated = live_idx.reset_index().drop(columns=["ts_str"])
    write_live(live_updated)
    print(f"[network] updated (win={win}, max_lag={max_lag})")
    return latest[["ts","pool","neighbor_max_dev","neighbor_avg_anom","lead_lag_best","corr_best"]].sort_values("pool")

In [191]:
from typing import List, Sequence

In [192]:
_NET_FEATS = ["neighbor_max_dev","neighbor_avg_anom","lead_lag_best","corr_best"]
def _default_feats_tabular() -> List[str]:
    return [
        "dev","dev_roll_std","tvl_outflow_rate","spot_twap_gap_bps","oracle_ratio",
        "anom_fused","r0_delta","r1_delta","event_severity_max_24h","event_count_24h",
        *_NET_FEATS,
    ]

In [193]:
def train_forecaster_10m(feature_cols: List[str] | None = None, label_col: str = "y_10m"):
    if not globals().get("_XGB_OK", False) or not globals().get("_SK_OK", False):
        raise RuntimeError("xgboost + scikit-learn required to train 10m forecaster.")
    try:
        compute_network_features()
    except Exception:
        pass
    df = load_live().copy()
    if df.empty:
        raise ValueError("live_dataset is empty. sample more rows first.")
    df = ensure_targets_all(df) if "ensure_targets_all" in globals() else df
    if feature_cols is None:
        feature_cols = _default_feats_tabular()
    use_cols = [c for c in feature_cols if c in df.columns]
    X = df[use_cols].replace([np.inf,-np.inf], np.nan).fillna(0.0).to_numpy()
    y = pd.to_numeric(df[label_col], errors="coerce").fillna(0).astype(int).to_numpy()
    ts = df["ts"]; tr_idx, te_idx = _time_split_idx(ts, 0.70)
    Xtr, Xte = X[tr_idx], X[te_idx]; ytr, yte = y[tr_idx], y[te_idx]
    clf = xgb.XGBClassifier(
        n_estimators=300, max_depth=4, learning_rate=0.06,
        subsample=0.9, colsample_bytree=0.8,
        objective="binary:logistic", eval_metric="logloss",
        random_state=SEED, n_jobs=0
    )
    clf.fit(Xtr, ytr)
    calib = None
    unique, counts = np.unique(ytr, return_counts=True)
    min_cls = min(counts) if len(counts)==2 else 0
    if min_cls >= 3:
        try:
            calib = CalibratedClassifierCV(clf, method="isotonic", cv=3).fit(Xtr, ytr)
        except Exception:
            calib = None
    if calib is None and min_cls >= 2:
        try:
            calib = CalibratedClassifierCV(clf, method="sigmoid", cv=2).fit(Xtr, ytr)
        except Exception:
            calib = None
    if len(np.unique(yte))>1:
        try:
            ap = average_precision_score(yte, (calib or clf).predict_proba(Xte)[:,1])
            bs = float(brier_score_loss(yte, (calib or clf).predict_proba(Xte)[:,1]))
        except Exception:
            ap, bs = float("nan"), float("nan")
    else:
        ap, bs = float("nan"), float("nan")
    print(f"[forecast10m] AP={ap if pd.notna(ap) else float('nan'):.3f}  Brier={bs if pd.notna(bs) else float('nan'):.3f}  (with network feats)")
    dump(clf, OUT_MODEL / "forecast_10m_xgb.joblib")
    if calib: dump(calib, OUT_MODEL / "forecast_10m_calib.joblib")
    elif (OUT_MODEL / "forecast_10m_calib.joblib").exists():
        (OUT_MODEL / "forecast_10m_calib.joblib").unlink()
    tail = df.tail(6).copy()
    X_tail = tail[use_cols].replace([np.inf,-np.inf], np.nan).fillna(0.0).to_numpy()
    tail["risk_forecast_10m"] = (calib or clf).predict_proba(X_tail)[:,1]
    return tail[["ts","pool","anom_fused","risk_forecast_10m"]]

In [194]:
def score_latest_10m(n_tail: int = 60, feature_cols: Sequence[str] | None = None, write_parquet: bool = True) -> pd.DataFrame:
    df = load_live()
    if df.empty:
        raise ValueError("live_dataset is empty. Run sample_once() first.")
    try:
        compute_network_features()
    except Exception:
        pass
    if feature_cols is None:
        feature_cols = _default_feats_tabular()
    use_cols = [c for c in feature_cols if c in df.columns]
    X_tail = df.tail(n_tail)[use_cols].replace([np.inf,-np.inf], np.nan).fillna(0.0).to_numpy()
    clf, calib = _load_forecaster()  
    if clf is None and calib is None:
        _ = train_forecaster_10m(feature_cols=list(use_cols))
        clf, calib = _load_forecaster()
    model = calib if calib is not None else clf
    p = model.predict_proba(X_tail)[:,1]
    out = df.tail(n_tail).copy(); out["risk_forecast_10m"] = p
    out = out[["ts","pool","anom_fused","risk_forecast_10m"]]
    if write_parquet:
        try:
            out.to_parquet(FORECAST_10M_PARQUET, index=False); print(f"[forecast10m] wrote {FORECAST_10M_PARQUET}")
        except Exception as e:
            print(f"[warn] parquet write failed: {e}")
    return out.tail(10)

In [195]:
def train_forecaster_30m(feature_cols: List[str] | None = None, label_col: str = "y_30m"):
    if not globals().get("_XGB_OK", False) or not globals().get("_SK_OK", False):
        raise RuntimeError("xgboost + scikit-learn required to train 30m forecaster.")
    try:
        compute_network_features()
    except Exception:
        pass
    df = load_live().copy()
    if df.empty:
        raise ValueError("live_dataset is empty. sample more rows first.")
    df = ensure_targets_all(df) if "ensure_targets_all" in globals() else df
    if feature_cols is None:
        feature_cols = _default_feats_tabular()
    use_cols = [c for c in feature_cols if c in df.columns]
    X = df[use_cols].replace([np.inf,-np.inf], np.nan).fillna(0.0).to_numpy()
    y = pd.to_numeric(df[label_col], errors="coerce").fillna(0).astype(int).to_numpy()
    ts = df["ts"]; tr_idx, te_idx = _time_split_idx(ts, 0.70)
    Xtr, Xte = X[tr_idx], X[te_idx]; ytr, yte = y[tr_idx], y[te_idx]
    clf = xgb.XGBClassifier(
        n_estimators=300, max_depth=4, learning_rate=0.06,
        subsample=0.9, colsample_bytree=0.8,
        objective="binary:logistic", eval_metric="logloss",
        random_state=SEED, n_jobs=0
    )
    clf.fit(Xtr, ytr)
    calib = None
    unique, counts = np.unique(ytr, return_counts=True)
    min_cls = min(counts) if len(counts)==2 else 0
    if min_cls >= 3:
        try:
            calib = CalibratedClassifierCV(clf, method="isotonic", cv=3).fit(Xtr, ytr)
        except Exception:
            calib = None
    if calib is None and min_cls >= 2:
        try:
            calib = CalibratedClassifierCV(clf, method="sigmoid", cv=2).fit(Xtr, ytr)
        except Exception:
            calib = None
    if len(np.unique(yte))>1:
        try:
            ap = average_precision_score(yte, (calib or clf).predict_proba(Xte)[:,1])
            bs = float(brier_score_loss(yte, (calib or clf).predict_proba(Xte)[:,1]))
        except Exception:
            ap, bs = float("nan"), float("nan")
    else:
        ap, bs = float("nan"), float("nan")
    print(f"[forecast30m] AP={ap if pd.notna(ap) else float('nan'):.3f}  Brier={bs if pd.notna(bs) else float('nan'):.3f}  (with network feats)")
    dump(clf, FORECAST_30M_PATH)
    if calib: dump(calib, CALIB_30M_PATH)
    elif CALIB_30M_PATH.exists():
        CALIB_30M_PATH.unlink()
    tail = df.tail(6).copy()
    X_tail = tail[use_cols].replace([np.inf,-np.inf], np.nan).fillna(0.0).to_numpy()
    tail["risk_forecast_30m"] = (calib or clf).predict_proba(X_tail)[:,1]
    return tail[["ts","pool","anom_fused","risk_forecast_30m"]]

In [196]:
def score_latest_30m(n_tail: int = 60, feature_cols: Sequence[str] | None = None, write_parquet: bool = True) -> pd.DataFrame:
    df = load_live()
    if df.empty:
        raise ValueError("live_dataset is empty. Run sample_once() first.")
    try:
        compute_network_features()
    except Exception:
        pass
    if feature_cols is None:
        feature_cols = _default_feats_tabular()
    use_cols = [c for c in feature_cols if c in df.columns]
    X_tail = df.tail(n_tail)[use_cols].replace([np.inf,-np.inf], np.nan).fillna(0.0).to_numpy()
    clf, calib = _load_forecaster_30m()
    if clf is None and calib is None:
        _ = train_forecaster_30m(feature_cols=list(use_cols))
        clf, calib = _load_forecaster_30m()
    model = calib if calib is not None else clf
    p = model.predict_proba(X_tail)[:,1]
    out = df.tail(n_tail).copy(); out["risk_forecast_30m"] = p
    out = out[["ts","pool","anom_fused","risk_forecast_30m"]]
    if write_parquet:
        try:
            out.to_parquet(FORECAST_30M_PARQUET, index=False); print(f"[forecast30m] wrote {FORECAST_30M_PARQUET}")
        except Exception as e:
            print(f"[warn] parquet write failed: {e}")
    return out.tail(10)

In [197]:
def build_analyst_note_v2() -> dict:
    """
    Build Analyst Note v2 including:
      - risk_now, 10m & 30m forecast
      - top contributors
      - propagation cue with (win, max_lag) for audit
    """
    import json
    tail = load_live().sort_values("ts").groupby("pool").tail(1)
    if tail.empty:
        return {"ok": False, "reason": "no data"}
    top3 = []
    try:
        if EXPLAIN_JSON.exists():
            explain = json.loads(EXPLAIN_JSON.read_text())
        else:
            explain = explain_forecast_10m()
        top3 = (explain or {}).get("top_contributors", [])[:3]
    except Exception:
        pass
    cue = None
    try:
        net = compute_network_features()  
        if isinstance(net, pd.DataFrame) and not net.empty:
            nrow = net.sort_values("neighbor_avg_anom", ascending=False).iloc[0]
            lead_str = "leads" if nrow["lead_lag_best"] > 0 else ("lags" if nrow["lead_lag_best"] < 0 else "co-moves")
            win = int(globals().get("_LAST_NET_WIN", 60))
            ml  = int(globals().get("_LAST_MAX_LAG", 10))
            cue = f"{nrow['pool']} {lead_str} peers (corr={float(nrow['corr_best']):.2f}; win={win}, max_lag={ml})."
    except Exception:
        pass
    risk_now = 0.0; p10 = 0.0; p30 = 0.0
    try:
        n_tail = max(1, len(tail))
        scored10 = score_latest_10m(n_tail=n_tail, write_parquet=False)
        if isinstance(scored10, pd.DataFrame) and not scored10.empty:
            rmax10 = scored10.sort_values("risk_forecast_10m", ascending=False).iloc[0]
            p10 = float(rmax10.get("risk_forecast_10m", 0.0))
            risk_now = float(rmax10.get("anom_fused", 0.0))
    except Exception:
        pass
    if not np.isfinite(risk_now) or risk_now == 0.0:
        try:
            risk_now = float(tail.get("anom_fused", pd.Series([0.0])).iloc[-1])
        except Exception:
            risk_now = 0.0
    try:
        n_tail = max(1, len(tail))
        scored30 = score_latest_30m(n_tail=n_tail, write_parquet=False)
        if isinstance(scored30, pd.DataFrame) and not scored30.empty:
            rmax30 = scored30.sort_values("risk_forecast_30m", ascending=False).iloc[0]
            p30 = float(rmax30.get("risk_forecast_30m", 0.0))
    except Exception:
        pass
    band = ("High" if (p10 >= 0.60 or p30 >= 0.60 or risk_now >= 0.90)
            else ("Medium" if (p10 >= 0.35 or p30 >= 0.50 or risk_now >= 0.70) else "Low"))
    try:
        freshness = "Fresh" if bool(tail["feeds_fresh"].iloc[-1]) else "Stale"
    except Exception:
        freshness = "Unknown"
    note_lines = [f"Fused anomaly now={risk_now:.2f}; 10-min risk={p10:.2f}; 30-min risk={p30:.2f}. Confidence {band}."]
    if top3:
        note_lines.append("Top drivers: " + "; ".join(top3))
    if cue:
        note_lines.append("Propagation: " + cue)
    note_lines.append("Action: monitor in 5m; if risk > 0.60 or 3 consecutive reds, engage mitigation (widen slippage / reroute).")
    note = " ".join(note_lines)[:900]
    try:
        pdf_path = export_analyst_note_pdf(
            note_text=note,
            risk_now=risk_now,
            risk_10m=p10,
            risk_30m=p30,
            contributors=top3,
            freshness=freshness,
            confidence=band,
            out_path=OUT / "analyst_note.pdf",
            title="Analyst Note v2"
        )
    except Exception:
        pdf_path = OUT / "analyst_note.txt"
        try:
            pdf_path.write_text(note)
        except Exception:
            pass
    return {"ok": True, "note": note, "pdf": str(pdf_path)}

In [198]:
import json, math, hashlib, time
from pathlib import Path
from datetime import datetime, timezone, timedelta
import numpy as np
import pandas as pd
try:
    from sklearn.isotonic import IsotonicRegression
    from sklearn.linear_model import LogisticRegression
    _SK_SEV_OK = True
except Exception:
    _SK_SEV_OK = False
SEV_CALIB_PATH = OUT_MODEL / "severity_calibrator.joblib"
SEV_CALIB_META = OUT_MODEL / "severity_calibrator_meta.json"
try:
    from joblib import dump, load
    _JOBLIB_OK = True
except Exception:
    _JOBLIB_OK = False

In [199]:
def _sev_predict_proba(texts: list[str]) -> np.ndarray:
    """
    Return uncalibrated severity probability in [0,1] for each text.
    Uses your global SEV_MODEL if available; falls back to 0/1 on non-empty.
    """
    if not texts:
        return np.zeros(0, dtype=float)
    try:
        if hasattr(SEV_MODEL, "predict_proba"):
            p = SEV_MODEL.predict_proba(texts)
            if isinstance(p, (list, np.ndarray)):
                p = np.asarray(p, dtype=float)
                if p.ndim == 2 and p.shape[1] == 2:
                    p = p[:, 1]
                return np.clip(p, 0, 1)
    except Exception:
        pass
    try:
        if hasattr(SEV_MODEL, "model") and hasattr(SEV_MODEL.model, "predict_proba"):
            p = SEV_MODEL.model.predict_proba(SEV_MODEL._embed(texts))[:, 1]
            return np.clip(p, 0, 1)
    except Exception:
        pass
    prior = 0.5
    try:
        if hasattr(SEV_MODEL, "model") and isinstance(SEV_MODEL.model, dict) and "prior" in SEV_MODEL.model:
            prior = float(SEV_MODEL.model["prior"])
    except Exception:
        pass
    return np.array([prior if (t is None or str(t).strip() == "") else 1.0 for t in texts], dtype=float)

In [200]:
class _SeverityCalibrator:
    """
    Calibrates a 1D probability p_raw -> p_cal using:
      - IsotonicRegression (default), or
      - Logistic (Platt) sigmoid on the raw p.
    """
    def __init__(self, method: str = "isotonic"):
        self.method = "isotonic" if method not in ("sigmoid", "isotonic") else method
        self.model = None
    def fit(self, p_raw: np.ndarray, y_sev_1to5: np.ndarray):
        if not _SK_SEV_OK:
            self.model = None
            return self
        p = np.clip(np.asarray(p_raw, dtype=float), 0, 1)
        y = np.asarray(y_sev_1to5, dtype=float)
        y_bin = (y >= 3).astype(int)
        if len(np.unique(y_bin)) < 2:
            self.model = ("identity", None)
            return self
        if self.method == "isotonic":
            iso = IsotonicRegression(out_of_bounds="clip")
            iso.fit(p, y_bin)
            self.model = ("isotonic", iso)
        else:
            lr = LogisticRegression(solver="lbfgs")
            lr.fit(p.reshape(-1, 1), y_bin)
            self.model = ("sigmoid", lr)
        return self
    def predict_proba(self, p_raw: np.ndarray) -> np.ndarray:
        p = np.clip(np.asarray(p_raw, dtype=float), 0, 1)
        if self.model is None:
            return p
        kind, m = self.model
        if kind == "identity" or m is None:
            return p
        if kind == "isotonic":
            return np.clip(m.predict(p), 0, 1)
        return np.clip(m.predict_proba(p.reshape(-1, 1))[:, 1], 0, 1)

In [201]:
def train_severity_calibrator(method: str = "isotonic"):
    """
    Train a probability calibrator for severity using existing EVENTS_JSON.
    Maps text -> p_raw (from SEV_MODEL) -> p_cal (calibrated).
    Saves calibrator to disk.
    """
    if not EVENTS_JSON.exists():
        raise FileNotFoundError(f"No EVENTS_JSON at {EVENTS_JSON}")
    events = json.loads(EVENTS_JSON.read_text())
    if not isinstance(events, list) or not events:
        raise ValueError("No events to calibrate on.")
    texts = [str(e.get("summary", "")) for e in events]
    y_1to5 = np.array([int(e.get("severity", 1)) for e in events], dtype=int)
    p_raw = _sev_predict_proba(texts)
    calib = _SeverityCalibrator(method=method).fit(p_raw, y_1to5)
    if _JOBLIB_OK:
        dump(calib, SEV_CALIB_PATH)
    meta = {"ts": datetime.now(timezone.utc).isoformat(timespec="seconds"),
            "method": calib.method, "n": int(len(texts))}
    SEV_CALIB_META.write_text(json.dumps(meta, indent=2))
    return calib

In [202]:
def _load_sev_calibrator():
    if _JOBLIB_OK and SEV_CALIB_PATH.exists():
        try:
            return load(SEV_CALIB_PATH)
        except Exception:
            return None
    return None
def severity_scores_1to5(texts: list[str]) -> list[int]:
    """
    Calibrated severity 1..5 from text inputs.
    """
    p_raw = _sev_predict_proba(texts)
    calib = _load_sev_calibrator()
    p_cal = calib.predict_proba(p_raw) if calib else p_raw
    sev = np.clip((np.floor(p_cal * 5) + 1).astype(int), 1, 5)
    return sev.tolist()
def enrich_events_with_calibrated_severity():
    if not EVENTS_JSON.exists():
        return
    ev = json.loads(EVENTS_JSON.read_text())
    if not isinstance(ev, list) or not ev:
        return
    texts = [str(e.get("summary", "")) for e in ev]
    sev = severity_scores_1to5(texts)
    for i, s in enumerate(sev):
        ev[i]["severity"] = int(s)
    EVENTS_JSON.write_text(json.dumps(ev, indent=2))
    print("[severity] events updated with calibrated severities (1..5)")

In [203]:
EMB_CACHE = OUT / "embeddings.npz"
try:
    from sentence_transformers import SentenceTransformer
    _SBERT_OK = True
except Exception:
    _SBERT_OK = False

In [204]:
class RecencyRAG:
    """
    Maintains an embedding cache (NPZ) with fields:
      ids: list[str], texts: list[str], sources: list[str], ts: list[int], vecs: np.ndarray
    Search uses cosine similarity * recency decay, restricted to last N days.
    """
    def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"):
        self.model_name = model_name
        self.model = SentenceTransformer(model_name) if _SBERT_OK else None
        self.ids, self.texts, self.sources, self.ts, self.vecs = [], [], [], [], None
        self._load()
    def _load(self):
        if EMB_CACHE.exists():
            z = np.load(EMB_CACHE, allow_pickle=True)
            self.ids     = list(z["ids"])
            self.texts   = list(z["texts"])
            self.sources = list(z["sources"])
            self.ts      = list(z["ts"])
            self.vecs    = z["vecs"]
        else:
            self.vecs = np.zeros((0, 384), dtype=np.float32)  
    def _save(self):
        np.savez_compressed(
            EMB_CACHE,
            ids=np.array(self.ids, dtype=object),
            texts=np.array(self.texts, dtype=object),
            sources=np.array(self.sources, dtype=object),
            ts=np.array(self.ts, dtype=np.int64),
            vecs=self.vecs.astype(np.float32),
        )
    def _embed(self, texts: list[str]) -> np.ndarray:
        if self.model is not None:
            v = self.model.encode(texts, normalize_embeddings=True)
            return v.astype(np.float32)
        rng = np.random.default_rng(42)
        proj = rng.normal(size=(512, 384)).astype(np.float32)
        X = np.zeros((len(texts), 512), dtype=np.float32)
        for i, t in enumerate(texts):
            h = hashlib.sha1(str(t).encode()).digest()
            idx = np.frombuffer(h, dtype=np.uint8) % 512
            X[i, idx] = 1.0
        v = X @ proj
        n = np.linalg.norm(v, axis=1, keepdims=True) + 1e-9
        return (v / n).astype(np.float32)
    def upsert(self, docs: list[dict]):
        """
        docs: [{id, text, source, ts(int seconds)}]
        Only (re)embeds new/changed docs; caches to disk.
        """
        if not docs:
            return
        id_to_pos = {d: i for i, d in enumerate(self.ids)}
        batch_texts, batch_pos = [], []
        now = int(datetime.now(timezone.utc).timestamp())
        for d in docs:
            _id   = str(d.get("id") or d.get("source") or d.get("title") or f"doc-{now}")
            text  = str(d.get("text", ""))[:2000]
            src   = str(d.get("source", ""))
            ts    = int(d.get("ts", now))
            pos = id_to_pos.get(_id, -1)
            if pos < 0:
                self.ids.append(_id); self.texts.append(text); self.sources.append(src); self.ts.append(ts)
                batch_texts.append(text); batch_pos.append(len(self.ids) - 1)
            elif self.texts[pos] != text:
                self.texts[pos] = text; self.sources[pos] = src; self.ts[pos] = ts
                batch_texts.append(text); batch_pos.append(pos)
        if batch_texts:
            new_vecs = self._embed(batch_texts)
            if self.vecs.shape[0] < len(self.ids):
                extra = np.zeros((len(self.ids) - self.vecs.shape[0], new_vecs.shape[1]), dtype=np.float32)
                self.vecs = np.vstack([self.vecs, extra])
            for v, pos in zip(new_vecs, batch_pos):
                self.vecs[pos] = v
            self._save()
    def search(self, query: str, k: int = 5, window_days: int = 14, half_life_days: int = 7):
        """
        Cosine similarity * exp(-age/half_life) over docs from last `window_days`.
        """
        if not self.ids:
            return []
        qv = self._embed([query])[0] 
        ages = np.array([max(0, int(datetime.now(timezone.utc).timestamp()) - int(t)) for t in self.ts])
        in_window = ages <= (window_days * 86400)
        if not in_window.any():
            return []
        V = self.vecs[in_window]
        srcs = np.array(self.sources, dtype=object)[in_window]
        texts = np.array(self.texts, dtype=object)[in_window]
        ts    = np.array(self.ts, dtype=np.int64)[in_window]
        sims = (V @ qv)  
        decay = np.exp(- (ages[in_window] / (half_life_days * 86400.0)))
        score = sims * decay
        idx = np.argsort(-score)[:k]
        out = []
        for i in idx:
            out.append({"source": str(srcs[i]), "text": str(texts[i]), "ts": int(ts[i]), "score": float(score[i])})
        return out

In [205]:
try:
    RAG_B = RecencyRAG()  
except Exception:
    RAG_B = None

In [206]:
def _recent_event_citations(hours: int = 48, top_n: int = 3) -> list[str]:
    if not EVENTS_JSON.exists():
        return []
    try:
        ev = json.loads(EVENTS_JSON.read_text())
    except Exception:
        return []
    if not isinstance(ev, list) or not ev:
        return []
    now = datetime.now(timezone.utc).timestamp()
    srcs = []
    for e in sorted(ev, key=lambda z: int(z.get("severity", 0)), reverse=True):
        ts = e.get("ts")
        try:
            t = pd.to_datetime(ts, utc=True).timestamp()
        except Exception:
            t = now
        if (now - t) <= hours * 3600:
            s = str(e.get("source", "")).strip()
            if s and (s not in srcs):
                srcs.append(s)
        if len(srcs) >= top_n:
            break
    return srcs

In [207]:
def build_analyst_note_v2():
    """
    (Patched) Adds source citations line when events contributed.
    Keeps win/max_lag in propagation cue (from your earlier patch).
    """
    import json
    tail = load_live().sort_values("ts").groupby("pool").tail(1)
    if tail.empty:
        return {"ok": False, "reason": "no data"}
    top3 = []
    try:
        if EXPLAIN_JSON.exists():
            explain = json.loads(EXPLAIN_JSON.read_text())
        else:
            explain = explain_forecast_10m()
        top3 = (explain or {}).get("top_contributors", [])[:3]
    except Exception:
        pass
    cue = None
    try:
        net = compute_network_features()
        if isinstance(net, pd.DataFrame) and not net.empty:
            nrow = net.sort_values("neighbor_avg_anom", ascending=False).iloc[0]
            lead_str = "leads" if nrow["lead_lag_best"] > 0 else ("lags" if nrow["lead_lag_best"] < 0 else "co-moves")
            win = int(globals().get("_LAST_NET_WIN", 60))
            ml  = int(globals().get("_LAST_MAX_LAG", 10))
            cue = f"{nrow['pool']} {lead_str} peers (corr={float(nrow['corr_best']):.2f}; win={win}, max_lag={ml})."
    except Exception:
        pass
    risk_now = 0.0; p10 = 0.0; p30 = 0.0
    try:
        scored10 = score_latest_10m(n_tail=max(1, len(tail)), write_parquet=False)
        if isinstance(scored10, pd.DataFrame) and not scored10.empty:
            rmax10 = scored10.sort_values("risk_forecast_10m", ascending=False).iloc[0]
            p10 = float(rmax10.get("risk_forecast_10m", 0.0))
            risk_now = float(rmax10.get("anom_fused", 0.0))
    except Exception:
        pass
    if not np.isfinite(risk_now) or risk_now == 0.0:
        try:
            risk_now = float(tail.get("anom_fused", pd.Series([0.0])).iloc[-1])
        except Exception:
            risk_now = 0.0
    try:
        scored30 = score_latest_30m(n_tail=max(1, len(tail)), write_parquet=False)
        if isinstance(scored30, pd.DataFrame) and not scored30.empty:
            rmax30 = scored30.sort_values("risk_forecast_30m", ascending=False).iloc[0]
            p30 = float(rmax30.get("risk_forecast_30m", 0.0))
    except Exception:
        pass
    band = ("High" if (p10 >= 0.60 or p30 >= 0.60 or risk_now >= 0.90)
            else ("Medium" if (p10 >= 0.35 or p30 >= 0.50 or risk_now >= 0.70) else "Low"))
    try:
        freshness = "Fresh" if bool(tail["feeds_fresh"].iloc[-1]) else "Stale"
    except Exception:
        freshness = "Unknown"

    note_lines = [
        f"Fused anomaly now={risk_now:.2f}; 10-min risk={p10:.2f}; 30-min risk={p30:.2f}. Confidence {band}."
    ]
    if top3:
        note_lines.append("Top drivers: " + "; ".join(top3))
    if cue:
        note_lines.append("Propagation: " + cue)
    cites = _recent_event_citations(hours=48, top_n=3)
    if cites:
        note_lines.append("Sources: " + " | ".join(cites))
    note_lines.append("Action: monitor in 5m; if risk > 0.60 or 3 consecutive reds, engage mitigation (widen slippage / reroute).")
    note = " ".join(note_lines)[:900]
    try:
        pdf_path = export_analyst_note_pdf(
            note_text=note,
            risk_now=risk_now,
            risk_10m=p10,
            risk_30m=p30,
            contributors=top3,
            freshness=freshness,
            confidence=band,
            out_path=OUT / "analyst_note.pdf",
            title="Analyst Note v2"
        )
    except Exception:
        pdf_path = OUT / "analyst_note.txt"
        try: pdf_path.write_text(note)
        except Exception: pass
    return {"ok": True, "note": note, "pdf": str(pdf_path), "citations": cites}

In [208]:
ALERTS_STATE_JSON = OUT / "alerts_state.json"
_LAST_ALERT = {"hash": "", "ts": 0.0, "acked": False}
def _load_alert_state():
    if ALERTS_STATE_JSON.exists():
        try:
            s = json.loads(ALERTS_STATE_JSON.read_text())
            _LAST_ALERT.update(s)
        except Exception:
            pass
def _save_alert_state():
    ALERTS_STATE_JSON.write_text(json.dumps(_LAST_ALERT, indent=2))
_load_alert_state()
def _mk_alert_hash(payload: dict) -> str:
    blob = json.dumps(payload, sort_keys=True)
    return hashlib.sha1(blob.encode()).hexdigest()

In [209]:
def write_incident_snapshot(level: str, note: dict | None = None, citations: list[str] | None = None, extras: dict | None = None) -> Path:
    """
    Writes outputs/incident_{ts}.md with state blob, features, network cue, citations, policy level.
    """
    ts = datetime.now(timezone.utc).isoformat(timespec="seconds")
    cites = citations or []
    net = {}
    try:
        netdf = compute_network_features()
        if isinstance(netdf, pd.DataFrame) and not netdf.empty:
            nrow = netdf.sort_values("neighbor_avg_anom", ascending=False).iloc[0].to_dict()
            net = {k: (float(v) if isinstance(v, (int, float, np.floating)) else v) for k, v in nrow.items()}
            net["win"] = int(globals().get("_LAST_NET_WIN", 60))
            net["max_lag"] = int(globals().get("_LAST_MAX_LAG", 10))
    except Exception:
        pass
    top = {}
    try:
        if EXPLAIN_JSON.exists():
            top = json.loads(EXPLAIN_JSON.read_text())
    except Exception:
        pass
    body = []
    body.append(f"# Incident Snapshot — {level.upper()}")
    body.append(f"Generated: {ts}\n")
    if note and note.get("note"):
        body.append("## Analyst Note")
        body.append(note["note"] + "\n")
    body.append("## Top Contributors")
    body.append("```json\n" + json.dumps(top, indent=2) + "\n```")
    body.append("## Network Cue")
    body.append("```json\n" + json.dumps(net, indent=2) + "\n```")
    if cites:
        body.append("## Citations\n" + "\n".join(f"- {c}" for c in cites))
    if extras:
        body.append("## State\n```json\n" + json.dumps(extras, indent=2) + "\n```")
    md = "\n".join(body)
    path = OUT / f"incident_{ts.replace(':','').replace('-','').replace('T','_')}.md"
    path.write_text(md)
    return path

In [210]:
def trigger_alerts_if_needed(webhook_url: str, n_tail: int = 80, min_rows_for_incidents: int = 60, cooldown_s: int = 600) -> bool:
    """Post 'red' incidents with requires_ack branch; persist cooldown/ack state."""
    if not webhook_url:
        print("[alert] no webhook configured")
        return False
    try:
        import requests
    except Exception as e:
        print("[alert] requests not available:", e)
        return False
    try:
        dec = decide_latest(n_tail=n_tail, min_rows_for_incidents=min_rows_for_incidents)
    except Exception as e:
        print("[alert] decide_latest failed:", e)
        return False
    reds = dec[dec["level"] == "red"] if isinstance(dec, pd.DataFrame) else pd.DataFrame()
    oranges = dec[dec["level"] == "orange"] if isinstance(dec, pd.DataFrame) else pd.DataFrame()
    events = reds if not reds.empty else oranges
    if events.empty:
        print("[alert] no orange/red alerts")
        return True
    payload = events.to_dict(orient="records")
    envelope = {"alerts": payload, "ts": datetime.now(timezone.utc).isoformat(timespec="seconds")}
    h = _mk_alert_hash(envelope)
    now = time.time()
    if _LAST_ALERT.get("hash") == h and (now - float(_LAST_ALERT.get("ts", 0))) < cooldown_s:
        print("[alert] suppressed (duplicate/cooldown)")
        return True
    _LAST_ALERT.update({"hash": h, "ts": now, "acked": False})
    _save_alert_state()
    level = "red" if not reds.empty else "orange"
    requires_ack = (level == "red")
    ack_url = f"/ack/{h}" 
    note = build_analyst_note_v2()
    cites = note.get("citations", [])
    snap = write_incident_snapshot(level=level, note=note, citations=cites,
                                   extras={"hash": h, "requires_ack": requires_ack})
    post = {
        "level": level,
        "requires_ack": requires_ack,
        "ack_hash": h,
        "ack_url": ack_url,
        "cooldown_s": cooldown_s,
        "snapshot_md": str(snap),
        "alerts": payload,
        "note": note.get("note", ""),
        "citations": cites,
    }
    try:
        r = requests.post(webhook_url, json=post, timeout=8)
        print(f"[alert] posted {len(payload)} {level} alerts → {r.status_code}")
        ok = (200 <= r.status_code < 300)
        if ok:
            _save_alert_state()
        return ok
    except Exception as e:
        print("[alert] post failed:", e)
        return False

In [211]:
try:
    from fastapi import FastAPI
except Exception:
    FastAPI = None
if FastAPI is not None:
    try:
        app  
    except NameError:
        app = FastAPI(title="Depeg Sentinel MCP", version="2.1.0")
    @app.post("/ack/{h}")
    def ack_alert(h: str):
        """
        Mark the latest pending alert as acknowledged.
        Your Slack button (or CLI) can hit this endpoint before auto-mitigation.
        """
        _load_alert_state()
        if _LAST_ALERT.get("hash") == h:
            _LAST_ALERT["acked"] = True
            _save_alert_state()
            return {"ok": True, "acknowledged": True, "hash": h}
        return {"ok": False, "acknowledged": False, "reason": "hash mismatch or no pending alert"}

In [212]:
train_severity_calibrator(method="isotonic")  
enrich_events_with_calibrated_severity()
if RAG_B:
    RAG_B.upsert([{"id":"curve-123", "text":"post text...", "source":"https://gov.curve.fi/t/...", "ts": int(time.time())}])
trigger_alerts_if_needed(WEBHOOK, n_tail=80, min_rows_for_incidents=60)

[severity] events updated with calibrated severities (1..5)
[alert] no webhook configured


False

In [213]:
from __future__ import annotations
import os, io, sys, json, time, hmac, hashlib, shutil, subprocess, zipfile
from datetime import datetime, timezone, timedelta
from pathlib import Path
from typing import Optional, List, Dict, Any, Sequence
from fastapi import APIRouter, FastAPI, Depends, HTTPException, Header, Request, Response
from pydantic import BaseModel, Field

In [214]:
OUT        = Path(globals().get("OUT", Path("outputs")))
OUT_MODEL  = Path(globals().get("OUT_MODEL", OUT / "model"))
OUT_ARCH   = Path(globals().get("OUT_ARCH", OUT / "archive"))
OUT_LOGS   = Path(globals().get("OUT_LOGS", OUT / "logs"))
LIVE_CSV   = Path(globals().get("LIVE_CSV", OUT / "live_dataset.csv"))
FORECAST_10M_PARQUET = Path(globals().get("FORECAST_10M_PARQUET", OUT / "forecast_10m.parquet"))
FORECAST_30M_PARQUET = Path(globals().get("FORECAST_30M_PARQUET", OUT / "forecast_30m.parquet"))
DET_AP_JSON = Path(globals().get("DET_AP_JSON", OUT / "artifacts" / "detector_pr_auc.json"))
CAL_10M_JSON = Path(globals().get("CAL_10M_JSON", OUT / "artifacts" / "calibration_10m.json"))
CAL_30M_JSON = Path(globals().get("CAL_30M_JSON", OUT / "artifacts" / "calibration_30m.json"))
ALERTS_STATE_JSON = Path(globals().get("ALERTS_STATE_JSON", OUT / "alerts_state.json"))
for p in (OUT, OUT_MODEL, OUT_ARCH, OUT_LOGS, OUT / "artifacts"):
    p.mkdir(parents=True, exist_ok=True)

In [215]:
API_KEY     = os.getenv("API_KEY", "").strip()         
HMAC_SECRET = os.getenv("HMAC_SECRET", "").strip()     
MAX_SKEW_S  = int(os.getenv("HMAC_MAX_SKEW_S", "300")) 
class AuthError(HTTPException):
    def __init__(self, detail="Unauthorized"):
        super().__init__(status_code=401, detail=detail)
async def _auth_guard(
    request: Request,
    x_api_key: Optional[str] = Header(default=None),
    x_timestamp: Optional[str] = Header(default=None),
    x_signature: Optional[str] = Header(default=None),
):
    if API_KEY:
        if not x_api_key or x_api_key != API_KEY:
            raise AuthError("Invalid API key")
    if HMAC_SECRET:
        if not x_timestamp or not x_signature:
            raise AuthError("Missing HMAC headers")
        try:
            ts = int(x_timestamp)
        except Exception:
            raise AuthError("Bad timestamp")
        if abs(time.time() - ts) > MAX_SKEW_S:
            raise AuthError("Timestamp out of range")
        body = await request.body()
        msg  = f"{ts}.{body.decode('utf-8', errors='ignore')}"
        dig  = hmac.new(HMAC_SECRET.encode(), msg.encode(), hashlib.sha256).hexdigest()
        if not hmac.compare_digest(dig, x_signature):
            raise AuthError("Bad signature")

In [216]:
class ScoreZooItem(BaseModel):
    ts: str
    pool: str
    z_if: float | None = None
    z_lof: float | None = None
    z_ocsvm: float | None = None
    z_cusum: float | None = None
    z_ae: float | None = None
    z_ae_seq: float | None = None
    anom_fused: float | None = None

In [217]:
class ScoreZooResponse(BaseModel):
    items: List[ScoreZooItem]
    model_config = {
        "json_schema_extra": {
            "examples": [{
                "items": [{
                    "ts": "2025-08-27T10:22:05+00:00", "pool": "USDC/USDT-uni",
                    "z_if": 0.12, "z_lof": 0.08, "z_ocsvm": 0.05, "z_cusum": 0.10,
                    "z_ae": 0.04, "z_ae_seq": 0.18, "anom_fused": 0.18
                }]
            }]
        }
    }

In [218]:
class ForecastItem(BaseModel):
    ts: str
    pool: str
    anom_fused: float | None = None
    risk_forecast_10m: float | None = None
    risk_forecast_30m: float | None = None

In [219]:
class ForecastResponse(BaseModel):
    items: List[ForecastItem]
    model_config = {
        "json_schema_extra": {
            "examples": [{
                "items": [{
                    "ts": "2025-08-27T10:22:05+00:00", "pool": "DAI/USDC-curve",
                    "anom_fused": 0.31, "risk_forecast_10m": 0.44, "risk_forecast_30m": 0.57
                }]
            }]
        }
    }
class ExplainResponse(BaseModel):
    top_contributors: List[str] = []
    model_config = {"json_schema_extra": {"examples": [{"top_contributors": ["dev_roll_std↑", "spot_twap_gap_bps↑", "event_severity_max_24h=5"]}]}}
class EventItem(BaseModel):
    ts: str
    summary: str
    severity: int = Field(ge=1, le=5)
    source: str | None = None
class TopEventsResponse(BaseModel):
    items: List[EventItem]
class NetworkSignal(BaseModel):
    ts: str
    pool: str
    neighbor_max_dev: float
    neighbor_avg_anom: float
    lead_lag_best: int
    corr_best: float
class NetworkSignalsResponse(BaseModel):
    items: List[NetworkSignal]
class PolicyState(BaseModel):
    feeds_fresh: bool | None = None
    recent_forecasts: Dict[str, float] | None = None
class PolicyDecision(BaseModel):
    level: str
    actions: List[Dict[str, str]]
    rationale: str
    requires_ack: bool = False
    model_config = {
        "json_schema_extra": {"examples": [{
            "level": "orange",
            "actions": [{"title": "Prepare mitigation", "rationale": "p10=0.62, fused=0.91"}],
            "rationale": "Elevated spread and TVL outflow; watch 5m",
            "requires_ack": False
        }]}
    }
class RetrainCheckResponse(BaseModel):
    should_retrain: bool
    reason: str
    drift: Dict[str, Any] | None = None
class SnapshotResponse(BaseModel):
    note: Dict[str, Any]
    report: Dict[str, Any] | None = None
class Healthz(BaseModel):
    ok: bool
    run_id: str
class Readyz(BaseModel):
    ok: bool
    rpc_ok: bool
    csv_write_ok: bool
    models_ok: bool
    detail: Dict[str, Any] = {}

In [220]:
RUN_ID = os.getenv("RUN_ID") or datetime.now(timezone.utc).strftime("run-%Y%m%dT%H%M%S")
LOG_PATH = OUT_LOGS / f"{RUN_ID}.jsonl"
def log_event(kind: str, payload: Dict[str, Any]):
    rec = {"ts": datetime.now(timezone.utc).isoformat(timespec="seconds"), "run_id": RUN_ID, "kind": kind, **payload}
    with LOG_PATH.open("a", encoding="utf-8") as f:
        f.write(json.dumps(rec, ensure_ascii=False) + "\n")

In [221]:
def _git_hash() -> str:
    try:
        return subprocess.check_output(["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL).decode().strip()
    except Exception:
        return "nogit"
def _time_split_idx(ts_series, frac: float = 0.70):
    import numpy as np, pandas as pd
    ts = pd.to_datetime(ts_series, utc=True, errors="coerce")
    cut = ts.quantile(frac)
    idx_tr = (ts <= cut).to_numpy().nonzero()[0]
    idx_te = (ts >  cut).to_numpy().nonzero()[0]
    return idx_tr, idx_te

In [222]:
def write_model_registry(
    name: str,
    feature_list: Sequence[str],
    labels: Dict[str, Any],
    metrics: Dict[str, float] | None = None,
    keep_last: int = 5,
) -> Path:
    OUT_MODEL.mkdir(parents=True, exist_ok=True)
    stamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S")
    payload = {
        "ts": datetime.now(timezone.utc).isoformat(timespec="seconds"),
        "model_name": name,
        "features": list(feature_list),
        "labels": labels,
        "metrics": metrics or {},
        "git_hash": _git_hash(),
        "artifacts": {
            "forecast_10m": str(FORECAST_10M_PARQUET) if FORECAST_10M_PARQUET.exists() else None,
            "forecast_30m": str(FORECAST_30M_PARQUET) if FORECAST_30M_PARQUET.exists() else None,
            "detector_pr_auc": str(DET_AP_JSON) if DET_AP_JSON.exists() else None,
            "cal_10m": str(CAL_10M_JSON) if CAL_10M_JSON.exists() else None,
            "cal_30m": str(CAL_30M_JSON) if CAL_30M_JSON.exists() else None,
        },
    }
    path = OUT_MODEL / f"version_{name}_{stamp}.json"
    path.write_text(json.dumps(payload, indent=2))
    (OUT_MODEL / "version.json").write_text(json.dumps(payload, indent=2))
    files = sorted(OUT_MODEL.glob(f"version_{name}_*.json"))
    if len(files) > keep_last:
        for old in files[:-keep_last]:
            try: old.unlink()
            except Exception: pass
    log_event("model_registry", {"name": name, "path": str(path), "metrics": metrics or {}})
    return path

In [223]:
def _maybe_register_after_train_10m(feature_cols: Sequence[str], ap: float | None, brier: float | None):
    labels = {
        "y_10m": getattr(globals().get("LABEL_DEF_10M", {}), "dict", lambda: {} )() or {
            "dev_thr": 0.005, "fused_thr": 0.90, "horizon": 10
        }
    }
    metrics = {"AP": float(ap) if ap is not None else None, "Brier": float(brier) if brier is not None else None}
    write_model_registry("forecast_10m", feature_cols, labels, metrics)
def _maybe_register_after_train_30m(feature_cols: Sequence[str], ap: float | None, brier: float | None):
    labels = {
        "y_30m": getattr(globals().get("LABEL_DEF_30M", {}), "dict", lambda: {} )() or {
            "dev_thr": 0.005, "fused_thr": 0.90, "horizon": 30
        }
    }
    metrics = {"AP": float(ap) if ap is not None else None, "Brier": float(brier) if brier is not None else None}
    write_model_registry("forecast_30m", feature_cols, labels, metrics)

In [224]:
def daily_zip_archives(retain_days_zip: int = 30, retain_days_raw: int = 14) -> Dict[str, Any]:
    """
    Zips yesterday's OUT/*.{csv,parquet,json} into OUT/archive/YYYYMMDD.zip and
    prunes zips older than retain_days_zip and raw files older than retain_days_raw.
    """
    now = datetime.now(timezone.utc)
    yday = (now - timedelta(days=1)).strftime("%Y%m%d")
    zip_path = OUT_ARCH / f"{yday}.zip"
    def _iter_files():
        for p in OUT.rglob("*"):
            if p.is_file() and p.suffix.lower() in {".csv", ".parquet", ".json"}:
                try:
                    mtime = datetime.utcfromtimestamp(p.stat().st_mtime)
                except Exception:
                    continue
                if mtime.strftime("%Y%m%d") == yday:
                    yield p
    created = False
    with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
        for f in _iter_files():
            arcname = f.relative_to(OUT).as_posix()
            zf.write(f, arcname)
            created = True
    cutoff_zip = now - timedelta(days=retain_days_zip)
    for z in OUT_ARCH.glob("*.zip"):
        try:
            ts = datetime.strptime(z.stem, "%Y%m%d")
            if ts < cutoff_zip:
                z.unlink()
        except Exception:
            pass
    cutoff_raw = now - timedelta(days=retain_days_raw)
    pruned = 0
    for p in OUT.rglob("*"):
        if p.is_file() and p.suffix.lower() in {".csv", ".parquet", ".json"} and OUT_ARCH not in p.parents:
            try:
                mtime = datetime.utcfromtimestamp(p.stat().st_mtime)
                if mtime < cutoff_raw:
                    p.unlink(); pruned += 1
            except Exception:
                pass
    result = {"zip": str(zip_path), "created": created, "pruned_raw": pruned}
    log_event("archive_daily", result)
    return result

In [225]:
def _rpc_ready() -> bool:
    try:
        w3 = getattr(globals().get("onchain", None), "w3", None)
        if not w3: return True  
        _ = w3.eth.block_number
        return True
    except Exception:
        return False
def _csv_write_ready() -> bool:
    try:
        tmp = OUT / ".readyz.tmp"
        tmp.write_text("ok")
        tmp.unlink(missing_ok=True)
        return True
    except Exception:
        return False
def _models_ready() -> bool:
    paths = [
        OUT_MODEL / "forecast_10m_xgb.joblib",
        OUT_MODEL / "forecast_10m_calib.joblib",
        OUT_MODEL / "forecast_30m_xgb.joblib",
        OUT_MODEL / "forecast_30m_calib.joblib",
    ]
    return any(p.exists() for p in paths)

In [226]:
ml_router = APIRouter(prefix="/ml", tags=["MCP3: risk-intel"], dependencies=[Depends(_auth_guard)])
policy_router = APIRouter(prefix="/policy", tags=["MCP4: meta-controller"], dependencies=[Depends(_auth_guard)])
ops_router = APIRouter(prefix="/ops", tags=["Ops"])

In [227]:
@ml_router.get("/score_zoo", response_model=ScoreZooResponse, summary="Parallel anomaly scores", description="IF/LOF/OCSVM/CUSUM/AE scores + fused per pool.")
def api_score_zoo(pools: Optional[List[str]] = None):
    fn = globals().get("run_anomaly_zoo_update_live")
    if not fn: raise HTTPException(500, "zoo function missing")
    df = fn()
    if pools:
        df = df[df["pool"].isin(pools)]
    items = df[["ts","pool","z_if","z_lof","z_ocsvm","z_cusum","z_ae","z_ae_seq","anom_fused"]].tail(200).to_dict("records")
    return {"items": items}

In [228]:
@ml_router.get("/forecast", response_model=ForecastResponse, summary="Risk forecasts", description="Return 10m/30m probabilities for tail rows.")
def api_forecast(pools: Optional[List[str]] = None, horizon: List[int] = [10, 30]):
    items: List[Dict[str, Any]] = []
    df_live = globals().get("load_live", lambda: None)()
    if df_live is None or df_live.empty:
        return {"items": []}
    h10 = 10 in horizon and "score_latest_10m" in globals()
    h30 = 30 in horizon and "score_latest_30m" in globals()
    base = df_live[["ts","pool","anom_fused"]].tail(200).copy()
    if h10:
        f10 = globals()["score_latest_10m"](n_tail=len(base), write_parquet=False)
        base = base.merge(f10[["ts","pool","risk_forecast_10m"]], on=["ts","pool"], how="left")
    if h30:
        f30 = globals()["score_latest_30m"](n_tail=len(base), write_parquet=False)
        base = base.merge(f30[["ts","pool","risk_forecast_30m"]], on=["ts","pool"], how="left")
    if pools:
        base = base[base["pool"].isin(pools)]
    for r in base.to_dict("records"):
        items.append(r)
    return {"items": items}

In [229]:
@ml_router.get("/explain", response_model=ExplainResponse, summary="Explainability (current)", description="Top contributing features/events for current alerts.")
def api_explain():
    if "explain_forecast_10m" in globals():
        obj = globals()["explain_forecast_10m"]()
        return {"top_contributors": obj.get("top_contributors", [])}
    return {"top_contributors": []}
@ml_router.get("/top_events", response_model=TopEventsResponse, summary="High-severity events")
def api_top_events(since: Optional[str] = None):
    fn = globals().get("_load_events", None)
    if not fn:
        return {"items": []}
    ev = fn()
    if since:
        ev = [e for e in ev if str(e.get("ts","")) >= since]
    ev.sort(key=lambda e: int(e.get("severity", 0)), reverse=True)
    out = [{"ts": e.get("ts",""), "summary": e.get("summary",""), "severity": int(e.get("severity",1)), "source": e.get("source","")} for e in ev[:50]]
    return {"items": out}
@ml_router.get("/network", response_model=NetworkSignalsResponse, summary="Cross-pool network signals", description="Correlation/lead-lag/neighbor features.")
def api_network(pools: Optional[List[str]] = None):
    fn = globals().get("compute_network_features", None)
    if not fn:
        return {"items": []}
    df = fn()
    if pools:
        df = df[df["pool"].isin(pools)]
    return {"items": df.to_dict("records")}

In [230]:
@policy_router.post("/decide", response_model=PolicyDecision, summary="Decide policy level/actions", description="Score gating + escalation rules. Requires auth.")
def api_policy_decide(state: Optional[PolicyState] = None):
    fn = globals().get("decide_latest", None)
    if not fn:
        raise HTTPException(500, "policy function missing")
    df = fn(n_tail=20)  
    row = df.tail(1).to_dict("records")[0]
    decision = {
        "level": row.get("level", "yellow"),
        "actions": row.get("actions", [{"title":"Monitor in 5m","rationale":"default"}]),
        "rationale": row.get("rationale", "rule-based"),
        "requires_ack": (row.get("level") == "red"),
    }
    log_event("policy_decision", {"input_state": state.dict() if state else {}, "decision": decision})
    return decision

In [231]:
@policy_router.get("/retrain_check", response_model=RetrainCheckResponse, summary="Retrain gate", description="Feature drift and schedule based retrain signal.")
def api_retrain_check():
    fn = globals().get("retrain_check", None)
    if not fn:
        return {"should_retrain": True, "reason": "unknown (no fn)", "drift": None}
    out = fn()
    log_event("retrain_check", out)
    return out
@policy_router.get("/snapshot", response_model=SnapshotResponse, summary="Incident snapshot", description="One-page JSON + rendered note.")
def api_snapshot():
    note_fn = globals().get("build_analyst_note_v2", None)
    rep_fn  = globals().get("nightly_report", None)
    note = note_fn() if note_fn else {"ok": False, "note": "(missing)"}
    rep  = rep_fn()  if rep_fn  else None
    log_event("snapshot", {"note_ok": bool(note.get("ok", False))})
    return {"note": note, "report": rep}
@ops_router.get("/healthz", response_model=Healthz, include_in_schema=False)
def healthz():
    return {"ok": True, "run_id": RUN_ID}
@ops_router.get("/readyz", response_model=Readyz, include_in_schema=False)
def readyz():
    rpc_ok  = _rpc_ready()
    csv_ok  = _csv_write_ready()
    mdl_ok  = _models_ready()
    ok = rpc_ok and csv_ok
    return {"ok": ok, "rpc_ok": rpc_ok, "csv_write_ok": csv_ok, "models_ok": mdl_ok,
            "detail": {"live_csv": str(LIVE_CSV), "model_dir": str(OUT_MODEL)}}

In [232]:
app = globals().get("app")
if not isinstance(app, FastAPI):
    app = FastAPI(title="Depeg Sentinel MCP", version=globals().get("APP_VERSION", "3.0.0"))
app.include_router(ml_router)
app.include_router(policy_router)
app.include_router(ops_router)

In [233]:
def _patch_train_hooks():
    import inspect
    from functools import wraps
    if "train_forecaster_10m" in globals():
        orig = globals()["train_forecaster_10m"]
        if not getattr(orig, "_wrapped_for_registry", False):
            @wraps(orig)
            def wrapped(*args, **kwargs):
                res = orig(*args, **kwargs)
                ap = None; brier = None
                try:
                    pass
                except Exception:
                    pass
                feat_cols = kwargs.get("feature_cols") or []
                _maybe_register_after_train_10m(feat_cols, ap, brier)
                return res
            wrapped._wrapped_for_registry = True
            globals()["train_forecaster_10m"] = wrapped
    if "train_forecaster_30m" in globals():
        orig = globals()["train_forecaster_30m"]
        if not getattr(orig, "_wrapped_for_registry", False):
            @wraps(orig)
            def wrapped(*args, **kwargs):
                res = orig(*args, **kwargs)
                ap = None; brier = None
                feat_cols = kwargs.get("feature_cols") or []
                _maybe_register_after_train_30m(feat_cols, ap, brier)
                return res
            wrapped._wrapped_for_registry = True
            globals()["train_forecaster_30m"] = wrapped
_patch_train_hooks()

In [234]:
def cron_daily_housekeeping():
    try:
        daily_zip_archives(retain_days_zip=30, retain_days_raw=14)
    except Exception as e:
        log_event("archive_error", {"err": str(e)})

In [235]:
import hmac, hashlib, time, json, requests
API = "https://sentinel.example.com"
API_KEY = "..."
HMAC_SECRET = b"..."

In [236]:
def auth_headers(method, path, body_obj=None):
    ts = str(int(time.time()))
    body = json.dumps(body_obj) if body_obj is not None else ""
    msg = f"{ts}.{method.upper()}.{path}.{body}".encode()
    sig = hmac.new(HMAC_SECRET, msg, hashlib.sha256).hexdigest()
    return {
        "X-API-Key": API_KEY,
        "X-Timestamp": ts,
        "X-Signature": sig,
        "Content-Type": "application/json",
    }

In [237]:
import time, json, hmac, hashlib, requests

In [238]:
BASE = "http://localhost:8000"
API_KEY = "YOUR_KEY"
HMAC_SECRET = b"YOUR_SECRET"

In [239]:
def auth_headers_simple(method, path, body_obj=None):
    ts = str(int(time.time()))
    body = json.dumps(body_obj) if body_obj is not None else ""
    msg  = f"{ts}.{body}".encode()  
    sig  = hmac.new(HMAC_SECRET, msg, hashlib.sha256).hexdigest()
    return {
        "X-API-Key": API_KEY,
        "X-Timestamp": ts,
        "X-Signature": sig,
        "Content-Type": "application/json",
    }

In [240]:
import threading, uvicorn
from fastapi import FastAPI

In [241]:
if 'app' not in globals() or not isinstance(app, FastAPI):
    app = FastAPI(title="Depeg Sentinel MCP (minimal)")
def _run():
    uvicorn.run(app, host="127.0.0.1", port=8000, log_level="info")
threading.Thread(target=_run, daemon=True).start()

INFO:     Started server process [17392]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


INFO:     127.0.0.1:54132 - "GET /healthz HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:54133 - "GET /ml/score_zoo HTTP/1.1" 200 OK


In [242]:
import requests, time

In [243]:
time.sleep(1)  
print(requests.get("http://127.0.0.1:8000/healthz").text)

{"detail":"Not Found"}


In [244]:
import time, json, hmac, hashlib, requests

In [245]:
BASE = "http://127.0.0.1:8000"
API_KEY = "YOUR_KEY"
HMAC_SECRET = b"YOUR_SECRET"

In [246]:
def auth_headers(method, path, body_obj=None):
    ts = str(int(time.time()))
    body = json.dumps(body_obj) if body_obj is not None else ""
    msg  = f"{ts}.{body}".encode()      
    sig  = hmac.new(HMAC_SECRET, msg, hashlib.sha256).hexdigest()
    return {"X-API-Key": API_KEY, "X-Timestamp": ts, "X-Signature": sig, "Content-Type": "application/json"}
r = requests.get(f"{BASE}/ml/score_zoo", headers=auth_headers("GET", "/ml/score_zoo"))
print(r.status_code, r.text)

[zoo] anomaly scores (incl. LSTM AE) updated
200 [{"ts":"2025-08-28T04:03:30+00:00","pool":"USDC/USDT_univ3","z_if":0.14128078343613304,"z_lof":0.03236953434238083,"z_ocsvm":0.152632505976585,"z_cusum":1.0,"z_ae":0.005321890581399202,"z_ae_seq":0.7132954001426697,"anom_fused":1.0},{"ts":"2025-08-28T20:49:13+00:00","pool":"USDC/USDT_univ3","z_if":0.06454996046366299,"z_lof":0.03936779859707906,"z_ocsvm":0.06844311549174299,"z_cusum":1.0,"z_ae":0.002712093060836196,"z_ae_seq":0.7167590260505676,"anom_fused":1.0},{"ts":"2025-08-28T20:50:37+00:00","pool":"USDC/USDT_univ3","z_if":0.09066860462038757,"z_lof":0.1844498835977221,"z_ocsvm":0.08473838677880109,"z_cusum":1.0,"z_ae":0.002930549206212163,"z_ae_seq":0.7169623374938965,"anom_fused":1.0},{"ts":"2025-08-28T21:00:23+00:00","pool":"USDC/USDT_univ3","z_if":0.05785105162450014,"z_lof":0.015703511928616937,"z_ocsvm":0.13273474357616008,"z_cusum":1.0,"z_ae":0.00470382533967495,"z_ae_seq":0.717720091342926,"anom_fused":1.0},{"ts":"2025-08-28T

In [247]:
from __future__ import annotations
import os, json, time, hmac, hashlib, threading
from pathlib import Path
from datetime import datetime, timezone, timedelta
from typing import Any, Dict, List, Sequence, Optional

In [248]:
def _eval_forecaster_metrics(
    horizon: int,
    feature_cols: Sequence[str] | None = None,
    label_col: str | None = None,
) -> Dict[str, float]:
    """
    Re-evaluate the active forecaster on the 30% holdout for AP & Brier.
    Uses current live CSV + current loaded (calibrated) model.
    """
    try:
        import numpy as np
        import pandas as pd
        from sklearn.metrics import average_precision_score, brier_score_loss
    except Exception:
        return {"AP": float("nan"), "Brier": float("nan")}
    df = load_live()
    if df is None or df.empty:
        return {"AP": float("nan"), "Brier": float("nan")}
    if feature_cols is None:
        if " _default_feats_tabular" in globals():  
            feature_cols = globals()["_default_feats_tabular"]()
        else:
            feature_cols = [
                "dev","dev_roll_std","tvl_outflow_rate","spot_twap_gap_bps","oracle_ratio",
                "anom_fused","r0_delta","r1_delta","event_severity_max_24h","event_count_24h",
                "neighbor_max_dev","neighbor_avg_anom","lead_lag_best","corr_best",
            ]
    if label_col is None:
        label_col = "y_10m" if horizon == 10 else "y_30m"
    use_cols = [c for c in feature_cols if c in df.columns]
    if not use_cols or label_col not in df.columns:
        return {"AP": float("nan"), "Brier": float("nan")}
    X = (df[use_cols]
         .replace([float("inf"), float("-inf")], float("nan"))
         .fillna(0.0).to_numpy())
    y = pd.to_numeric(df[label_col], errors="coerce").fillna(0).astype(int).to_numpy()
    ts = pd.to_datetime(df["ts"], utc=True, errors="coerce")
    cut = ts.quantile(0.70)
    te_mask = ts > cut
    if te_mask.sum() < 5 or len(set(y[te_mask])) < 2:
        return {"AP": float("nan"), "Brier": float("nan")}
    if horizon == 10:
        clf, calib = _load_forecaster()
    else:
        clf, calib = _load_forecaster_30m()
    model = calib if calib is not None else clf
    if model is None:
        return {"AP": float("nan"), "Brier": float("nan")}
    try:
        p = model.predict_proba(X[te_mask])[:, 1]
        ap = float(average_precision_score(y[te_mask], p))
        bs = float(brier_score_loss(y[te_mask], p))
        return {"AP": ap, "Brier": bs}
    except Exception:
        return {"AP": float("nan"), "Brier": float("nan")}

In [249]:
def _patch_train_hooks_strict():
    import inspect
    from functools import wraps
    def _feature_cols_from_kwargs(kwargs, default_fn_name="_default_feats_tabular"):
        cols = kwargs.get("feature_cols")
        if cols:
            return list(cols)
        dfc = globals().get(default_fn_name)
        return list(dfc()) if callable(dfc) else []
    if "train_forecaster_10m" in globals():
        orig = globals()["train_forecaster_10m"]
        if not getattr(orig, "_wrapped_for_registry_v2", False):
            @wraps(orig)
            def wrapped(*args, **kwargs):
                res = orig(*args, **kwargs)
                feat_cols = _feature_cols_from_kwargs(kwargs)
                m = _eval_forecaster_metrics(10, feature_cols=feat_cols, label_col="y_10m")
                _maybe_register_after_train_10m(feat_cols, m.get("AP"), m.get("Brier"))
                return res
            wrapped._wrapped_for_registry_v2 = True
            globals()["train_forecaster_10m"] = wrapped
    if "train_forecaster_30m" in globals():
        orig = globals()["train_forecaster_30m"]
        if not getattr(orig, "_wrapped_for_registry_v2", False):
            @wraps(orig)
            def wrapped(*args, **kwargs):
                res = orig(*args, **kwargs)
                feat_cols = _feature_cols_from_kwargs(kwargs)
                m = _eval_forecaster_metrics(30, feature_cols=feat_cols, label_col="y_30m")
                _maybe_register_after_train_30m(feat_cols, m.get("AP"), m.get("Brier"))
                return res
            wrapped._wrapped_for_registry_v2 = True
            globals()["train_forecaster_30m"] = wrapped
_patch_train_hooks_strict()

In [250]:
def export_contract_docs(out_dir: Path | None = None) -> Dict[str, str]:
    """
    Writes:
      - openapi.json (FastAPI spec)
      - contract.md  (handy per-route cheatsheet with auth & cURL)
    """
    out_dir = out_dir or (OUT / "contract")
    out_dir.mkdir(parents=True, exist_ok=True)
    spec_path = out_dir / "openapi.json"
    try:
        spec = app.openapi()
    except Exception:  
        spec = {}
    spec_path.write_text(json.dumps(spec, indent=2))
    lines = []
    lines.append("# Depeg Sentinel MCP — Contract Cheatsheet\n")
    lines.append("Auth headers (send all requests with these):\n")
    lines.append("```\nX-API-Key: <YOUR_KEY>\nX-Timestamp: <unix-seconds>\nX-Signature: HMAC_SHA256( SECRET, f\"{ts}.{body}\" )\nContent-Type: application/json\n```\n")
    def _curl(method: str, path: str, body_obj: Optional[dict] = None):
        body = json.dumps(body_obj) if body_obj is not None else ""
        return "\n".join([
            "```bash",
            "ts=$(date +%s)",
            f"sig=$(python - <<'PY'\nimport hmac,hashlib,os\nsec=os.environ.get('HMAC_SECRET','secret').encode()\nbody={body!r}\nts=os.environ.get('TS_OVERRIDE',str(int(__import__('time').time())))\nmsg=f\"{ '{' }ts{'}' }.{ '{' }body{'}' }\".encode()\nprint(hmac.new(sec,msg,hashlib.sha256).hexdigest())\nPY",
            ")",
            f"curl -sS -X {method} \"$BASE{path}\" \\",
            "  -H \"X-API-Key: $API_KEY\" \\",
            "  -H \"Content-Type: application/json\" \\",
            "  -H \"X-Timestamp: $ts\" \\",
            "  -H \"X-Signature: $sig\" \\",
            (f"  -d '{body}'" if body else ""),
            "```",
        ])
    lines.append("## /ml/score_zoo  `GET`\nReturns parallel anomaly scores.\n")
    lines.append(_curl("GET", "/ml/score_zoo"))
    lines.append("\n## /ml/forecast  `GET`\nReturns 10m/30m risk probabilities for the tail rows.\n")
    lines.append(_curl("GET", "/ml/forecast"))
    lines.append("\n## /ml/explain  `GET`\nTop contributors for the current window.\n")
    lines.append(_curl("GET", "/ml/explain"))
    lines.append("\n## /ml/top_events  `GET`\nHigh-severity recent events.\n")
    lines.append(_curl("GET", "/ml/top_events"))
    lines.append("\n## /ml/network  `GET`\nCross-pool network features and lead/lag.\n")
    lines.append(_curl("GET", "/ml/network"))
    lines.append("\n## /policy/decide  `POST`\nReturns meta-controller policy decision.\n")
    lines.append(_curl("POST", "/policy/decide", {"feeds_fresh": True, "recent_forecasts": {"poolA": 0.62}}))
    lines.append("\n## /policy/retrain_check  `GET`\nSignals when to retrain models.\n")
    lines.append(_curl("GET", "/policy/retrain_check"))
    lines.append("\n## /policy/snapshot  `GET`\nReturns analyst note and nightly report pointers.\n")
    lines.append(_curl("GET", "/policy/snapshot"))
    lines.append("\n## /ops/healthz  `GET`\nProcess heartbeat.\n")
    lines.append(_curl("GET", "/ops/healthz"))
    lines.append("\n## /ops/readyz  `GET`\nDependency readiness (RPC, CSV write, models on disk).\n")
    lines.append(_curl("GET", "/ops/readyz"))
    md_path = out_dir / "contract.md"
    md_path.write_text("\n".join(lines))
    log_event("contract_export", {"json": str(spec_path), "md": str(md_path)})
    return {"json": str(spec_path), "md": str(md_path)}

In [251]:
from fastapi.responses import JSONResponse, PlainTextResponse

In [252]:
@app.get("/ops/contract.json", include_in_schema=False)
def ops_contract_json():
    try:
        p = (OUT / "contract" / "openapi.json")
        if not p.exists():
            export_contract_docs()
        return JSONResponse(json.loads(p.read_text()))
    except Exception:
        return JSONResponse({})
@app.get("/ops/contract.md", response_class=PlainTextResponse, include_in_schema=False)
def ops_contract_md():
    try:
        p = (OUT / "contract" / "contract.md")
        if not p.exists():
            export_contract_docs()
        return PlainTextResponse(p.read_text())
    except Exception:
        return PlainTextResponse("# contract not available\n")

In [253]:
def _midnight_utc_in(seconds: int = 5) -> float:
    now = datetime.now(timezone.utc)
    nxt = (now + timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0)
    return max(1.0, (nxt - now).total_seconds() + seconds)
def _daily_housekeeping_loop():
    while True:
        try:
            export_contract_docs()             
            daily_zip_archives(30, 14)         
        except Exception as e:
            log_event("housekeeping_error", {"err": str(e)})
        time.sleep(_midnight_utc_in())
def start_housekeeping_daemon():
    t = threading.Thread(target=_daily_housekeeping_loop, name="housekeeping", daemon=True)
    t.start()
    log_event("housekeeping_started", {"thread": "housekeeping"})

In [254]:
try:
    start_housekeeping_daemon()
except Exception as e:
    log_event("housekeeping_start_error", {"err": str(e)})

  mtime = datetime.utcfromtimestamp(p.stat().st_mtime)
  mtime = datetime.utcfromtimestamp(p.stat().st_mtime)


In [255]:
def make_auth_headers(body_obj: Optional[dict] = None, api_key: Optional[str] = None, secret: Optional[str] = None) -> Dict[str, str]:
    """
    Standardize to the server-side rule in _auth_guard:
      signature = HMAC_SHA256(SECRET, f"{ts}.{body_json}")
    """
    ts = str(int(time.time()))
    body = json.dumps(body_obj) if body_obj is not None else ""
    sec = (secret or os.getenv("HMAC_SECRET", "secret")).encode()
    sig = hmac.new(sec, f"{ts}.{body}".encode(), hashlib.sha256).hexdigest()
    return {
        "X-API-Key": api_key or os.getenv("API_KEY", ""),
        "X-Timestamp": ts,
        "X-Signature": sig,
        "Content-Type": "application/json",
    }

In [256]:
try:
    export_contract_docs()
except Exception as e:
    log_event("contract_export_error", {"err": str(e)})

In [257]:
from __future__ import annotations
import os, re, json
from pathlib import Path

In [258]:
ENV_PATH = Path(".env")

In [259]:
def load_dotenv_if_exists():
    """Minimal .env loader that does NOT overwrite already-set env vars."""
    if ENV_PATH.exists():
        for line in ENV_PATH.read_text(encoding="utf-8").splitlines():
            line = line.strip()
            if not line or line.startswith("#") or "=" not in line:
                continue
            k, v = line.split("=", 1)
            k = k.strip()
            v = v.strip().strip('"').strip("'")
            os.environ.setdefault(k, v)

In [260]:
SECRET_PLACEHOLDERS = {"", "...", "YOUR_KEY", "YOUR_SECRET", "changeme", "<redacted>"}

In [261]:
SECRET_ENV_KEYS = {
    "HF_TOKEN", "HUGGINGFACE_HUB_TOKEN", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
    "SLACK_BOT_TOKEN", "DISCORD_BOT_TOKEN", "TELEGRAM_BOT_TOKEN",
    "ALERT_WEBHOOK", "SLACK_WEBHOOK", "DISCORD_WEBHOOK",
    "API_KEY", "HMAC_SECRET", "JWT_SECRET",
    "WEB3_RPC_URL", "ETH_RPC_URL", "BASE_RPC_URL", "ARBITRUM_RPC_URL",
}
ALIAS_GLOBAL_KEYS = {
    "WEBHOOK",  
}

In [262]:
def _should_skip_name(name: str) -> bool:
    if re.match(r"^_i\d*$", name):
        return True
    if name.startswith("__") and name.endswith("__"):
        return True
    return False
def _looks_like_secret(value: str) -> bool:
    """Heuristic patterns for tokens, secrets, webhooks; conservative on purpose."""
    if not isinstance(value, str) or len(value) < 12:
        return False
    pats = [
        r"^hf_[A-Za-z0-9]{20,}$",                 
        r"^sk-[A-Za-z0-9]{16,}",                  
        r"^xox[baprs]-[A-Za-z0-9-]{20,}",         
        r"https://hooks\.slack\.com/services/.+", 
        r"[A-Za-z0-9_\-]{24,}\.[A-Za-z0-9_\-]{6,}\.[A-Za-z0-9_\-]{10,}",  
    ]
    return any(re.search(p, value) for p in pats)

In [263]:
def scrub_secret(value: str, keep: int = 4) -> str:
    """Mask secrets for logs; URLs get their netloc & query masked."""
    if not isinstance(value, str) or not value:
        return ""
    if "://" in value:
        try:
            from urllib.parse import urlparse, parse_qsl, urlencode, urlunparse
            u = urlparse(value)
            qs = [(k, "***") for k, _ in parse_qsl(u.query, keep_blank_values=True)]
            u2 = u._replace(netloc="***", query=urlencode(qs))
            return urlunparse(u2)
        except Exception:
            return value.split("://", 1)[0] + "://***"
    return (value[:keep] + "…" + value[-keep:]) if len(value) > keep * 2 else "***"

In [264]:
def require_env(name: str):
    v = os.getenv(name, "")
    if not v or v in SECRET_PLACEHOLDERS:
        raise RuntimeError(f"[secrets] Missing required env var: {name}")
    return v
def _env_equals_global(name: str, gval: str) -> bool:
    """True if global value equals env value (likely set from env, not hard-coded)."""
    envv = os.getenv(name)
    return (envv is not None) and (envv == gval)

In [265]:
def _is_offender(name: str, gval: str) -> bool:
    """
    Decide if (name -> string value) looks like a hard-coded secret.
    Rules:
     - Only audit known secret names (SECRET_ENV_KEYS) + ALIAS_GLOBAL_KEYS.
     - Ignore placeholders.
     - If env has a value AND equals the global => OK (probably sourced from env).
     - If env has a value and global differs => offender (shadowing env with literal).
     - If env is empty and global is non-empty secret-looking => offender.
    """
    if name not in SECRET_ENV_KEYS and name not in ALIAS_GLOBAL_KEYS:
        return False
    if not isinstance(gval, str):
        return False
    if gval in SECRET_PLACEHOLDERS:
        return False
    env_key = "ALERT_WEBHOOK" if name == "WEBHOOK" else name
    envv = os.getenv(env_key, "")
    if envv and gval == envv:
        return False
    if envv and gval and gval != envv:
        return True
    if not envv and gval and _looks_like_secret(gval):
        return True
    if not envv and gval and "://" not in gval and len(gval) >= 20:
        return True
    return False

In [266]:
def _assert_no_inline_secrets(strict: bool = True):
    """
    Refuse to start if a known secret (by name) is hard-coded in globals.
    Avoids scanning arbitrary IPython variables like `_i140`.
    """
    offenders = []
    for key in sorted(SECRET_ENV_KEYS | ALIAS_GLOBAL_KEYS):
        if _should_skip_name(key):
            continue
        if key in globals():
            try:
                val = globals()[key]
            except Exception:
                continue
            if isinstance(val, str) and _is_offender(key, val):
                offenders.append(key)
    if offenders and strict:
        raise RuntimeError(
            "[secrets] Inline secret(s) detected for: "
            + ", ".join(offenders)
            + ". Move them to environment variables (e.g., .env) and read via os.getenv()."
        )
    elif offenders:
        print("[secrets][warn] Possible inline secrets:", offenders)

In [267]:
def get_env_secret(name: str, required: bool = False) -> str | None:
    v = os.getenv(name)
    if required and not v:
        raise RuntimeError(f"[secrets] Missing required env var: {name}")
    return v

In [268]:
try:
    load_dotenv_if_exists()
    _assert_no_inline_secrets(strict=True)
except Exception as _e:
    raise

In [269]:
def _log_rpc_url():
    try:
        rpc = os.getenv("WEB3_RPC_URL", "")
        if rpc:
            print("[rpc] using:", scrub_secret(rpc))
    except Exception:
        pass

In [270]:
ART_DIR = (OUT / "artifacts")
ART_DIR.mkdir(parents=True, exist_ok=True)
CORR_PNG = ART_DIR / "corr_sparkline.png"

In [271]:
def _pick_pool_pairs(pools: list[str]) -> tuple[str, str] | None:
    """Choose a sensible pair for correlation sparkline (USDC/USDT vs DAI/USDC, else first two)."""
    pri = [("USDC", "USDT"), ("DAI", "USDC"), ("USDT", "DAI")]
    def _has(sym, p): return sym in p.upper()
    for a, b in pri:
        candA = next((p for p in pools if _has(a, p) and _has("USDC", p) or _has("USDT", p)), None)
        candB = next((p for p in pools if _has(b, p) and (_has("USDC", p) or _has("DAI", p))), None)
        if candA and candB and candA != candB:
            return candA, candB
    return (pools[0], pools[1]) if len(pools) >= 2 else None

In [272]:
def save_corr_sparkline(win: int = 60) -> str | None:
    """
    Saves a very small sparkline of rolling correlation between two pools' dev series.
    Returns filepath or None.
    """
    import numpy as np, pandas as pd
    import matplotlib
    matplotlib.use("Agg")
    import matplotlib.pyplot as plt
    df = load_live()
    if df is None or df.empty or "dev" not in df.columns:
        return None
    pools = list(df["pool"].dropna().unique())
    if len(pools) < 2:
        return None
    pair = _pick_pool_pairs(pools)
    if not pair:
        return None
    a, b = pair
    da = df[df["pool"] == a].sort_values("ts").tail(win * 4).reset_index(drop=True)
    db = df[df["pool"] == b].sort_values("ts").tail(win * 4).reset_index(drop=True)
    m = min(len(da), len(db))
    if m < win + 5:
        return None
    xa = pd.to_numeric(da["dev"].tail(m), errors="coerce").fillna(0.0).to_numpy()
    xb = pd.to_numeric(db["dev"].tail(m), errors="coerce").fillna(0.0).to_numpy()
    corr = []
    for i in range(win, m + 1):
        s1 = xa[i - win:i]; s2 = xb[i - win:i]
        c = np.corrcoef(s1, s2)[0, 1] if (np.std(s1) > 0 and np.std(s2) > 0) else 0.0
        corr.append(float(c))
    if not corr:
        return None
    plt.figure(figsize=(6, 1.6))
    plt.plot(corr)
    plt.axhline(0.4, linestyle="--", linewidth=0.8)
    plt.axhline(-0.4, linestyle="--", linewidth=0.8)
    plt.yticks([-1, -0.5, 0, 0.5, 1])
    plt.title(f"Rolling corr ({a} vs {b})")
    plt.tight_layout()
    plt.savefig(CORR_PNG, dpi=140)
    plt.close()
    return str(CORR_PNG)

In [273]:
def _band_and_why(risk_now: float, p10: float, p30: float, tail_df) -> tuple[str, str]:
    """
    Returns (band, why_string). 'why' references the factor(s) that set the band.
    """
    reasons = []
    if p10 >= 0.60 or p30 >= 0.60 or risk_now >= 0.90:
        band = "High"
        if p10 >= 0.60: reasons.append(f"p10={p10:.2f}")
        if p30 >= 0.60: reasons.append(f"p30={p30:.2f}")
        if risk_now >= 0.90: reasons.append(f"fused={risk_now:.2f}")
    elif p10 >= 0.35 or p30 >= 0.50 or risk_now >= 0.70:
        band = "Medium"
        if p10 >= 0.35: reasons.append(f"p10={p10:.2f}")
        if p30 >= 0.50: reasons.append(f"p30={p30:.2f}")
        if risk_now >= 0.70: reasons.append(f"fused={risk_now:.2f}")
    else:
        band = "Low"
        reasons.append(f"p10={p10:.2f}")
    sev = None
    try:
        if "event_severity_max_24h" in tail_df.columns:
            sev = float(tail_df["event_severity_max_24h"].fillna(0).max())
            if sev and sev > 0:
                reasons.append(f"event_sev={int(sev)}")
    except Exception:
        pass
    return band, f"band={band} due to " + ", ".join(reasons[:3])

In [274]:
def _tail_table_lines(n: int = 3) -> list[str]:
    df = load_live()
    if df is None or df.empty:
        return []
    try:
        scored10 = score_latest_10m(n_tail=min(60, len(df)), write_parquet=False)
    except Exception:
        scored10 = pd.DataFrame()
    try:
        scored30 = score_latest_30m(n_tail=min(60, len(df)), write_parquet=False)
    except Exception:
        scored30 = pd.DataFrame()
    tail = df.sort_values("ts").groupby("pool").tail(1)[["ts","pool","anom_fused"]].copy()
    if isinstance(scored10, pd.DataFrame) and not scored10.empty:
        tail = tail.merge(scored10[["ts","pool","risk_forecast_10m"]], on=["ts","pool"], how="left")
    if isinstance(scored30, pd.DataFrame) and not scored30.empty:
        tail = tail.merge(scored30[["ts","pool","risk_forecast_30m"]], on=["ts","pool"], how="left")
    def _level(r):
        fused = float(r.get("anom_fused", 0.0))
        p10 = float(r.get("risk_forecast_10m", 0.0) or 0.0)
        p30 = float(r.get("risk_forecast_30m", 0.0) or 0.0)
        if fused >= 0.90 or p10 >= 0.60 or p30 >= 0.60: return "red"
        if fused >= 0.70 or p10 >= 0.35 or p30 >= 0.50: return "orange"
        return "yellow"
    rows = []
    for _, r in tail.sort_values("anom_fused", ascending=False).head(n).iterrows():
        rows.append(f"{r['pool']} | {float(r['anom_fused']):.2f} | {float(r.get('risk_forecast_10m',0.0)):.2f} | {float(r.get('risk_forecast_30m',0.0)):.2f} | {_level(r)}")
    if not rows:
        return []
    hdr = "Pool | Fused | p10 | p30 | Level"
    sep = "--- | --- | --- | --- | ---"
    return [hdr, sep, *rows]

In [275]:
def build_analyst_note_v2() -> dict:
    """
    (Polished) Analyst Note:
      - Adds 'why' line (band rationale)
      - Only shows propagation cue if |corr| > 0.4 (and always if so)
      - Appends tiny tail table
      - Ensures sources line if recent events exist
      - Saves correlation sparkline PNG
    """
    import numpy as np, pandas as pd
    try:
        save_corr_sparkline(win=60)
    except Exception:
        pass
    tail_all = load_live().sort_values("ts")
    tail = tail_all.groupby("pool").tail(1)
    if tail.empty:
        return {"ok": False, "reason": "no data"}
    top3 = []
    try:
        if EXPLAIN_JSON.exists():
            explain = json.loads(EXPLAIN_JSON.read_text())
        else:
            explain = explain_forecast_10m()
        top3 = (explain or {}).get("top_contributors", [])[:3]
    except Exception:
        pass
    cue = None
    corr_val = 0.0
    try:
        net = compute_network_features()
        if isinstance(net, pd.DataFrame) and not net.empty:
            nrow = net.sort_values("neighbor_avg_anom", ascending=False).iloc[0]
            corr_val = float(nrow.get("corr_best", 0.0))
            if abs(corr_val) > 0.4:
                lead_str = "leads" if nrow["lead_lag_best"] > 0 else ("lags" if nrow["lead_lag_best"] < 0 else "co-moves")
                win = int(globals().get("_LAST_NET_WIN", 60))
                ml  = int(globals().get("_LAST_MAX_LAG", 10))
                cue = f"{nrow['pool']} {lead_str} peers (corr={corr_val:.2f}; win={win}, max_lag={ml})."
    except Exception:
        pass
    risk_now = float(tail.get("anom_fused", pd.Series([0.0])).max()) if "anom_fused" in tail.columns else 0.0
    p10 = 0.0; p30 = 0.0
    try:
        scored10 = score_latest_10m(n_tail=max(1, len(tail)), write_parquet=False)
        if isinstance(scored10, pd.DataFrame) and not scored10.empty:
            p10 = float(scored10["risk_forecast_10m"].max())
            risk_now = float(scored10["anom_fused"].max())
    except Exception:
        pass
    try:
        scored30 = score_latest_30m(n_tail=max(1, len(tail)), write_parquet=False)
        if isinstance(scored30, pd.DataFrame) and not scored30.empty:
            p30 = float(scored30["risk_forecast_30m"].max())
    except Exception:
        pass
    band, why = _band_and_why(risk_now, p10, p30, tail_all)
    try:
        freshness = "Fresh" if bool(tail["feeds_fresh"].iloc[-1]) else "Stale"
    except Exception:
        freshness = "Unknown"
    cites = []
    try:
        cites = _recent_event_citations(hours=48, top_n=3)
    except Exception:
        pass
    note_lines = [
        f"Fused anomaly now={risk_now:.2f}; 10-min risk={p10:.2f}; 30-min risk={p30:.2f}.",
        f"Freshness={freshness}. Confidence {band}. {why}."
    ]
    if top3:
        note_lines.append("Top drivers: " + "; ".join(top3))
    if cue:
        note_lines.append("Propagation: " + cue)
    if cites:
        note_lines.append("Sources: " + " | ".join(cites))
    note_lines.append("Action: monitor in 5m; if risk > 0.60 or 3 consecutive reds, engage mitigation (widen slippage / reroute).")
    note_tail = _tail_table_lines(n=3)
    if note_tail:
        note_lines.append(" | ".join(note_tail[0].split(" | ")))  
        for row in note_tail[2:4]:
            note_lines.append(row)
    note = " ".join(note_lines)[:900]
    try:
        pdf_path = export_analyst_note_pdf(
            note_text=note,
            risk_now=risk_now,
            risk_10m=p10,
            risk_30m=p30,
            contributors=top3,
            freshness=freshness,
            confidence=band,
            out_path=OUT / "analyst_note.pdf",
            title="Analyst Note v2"
        )
    except Exception:
        pdf_path = OUT / "analyst_note.txt"
        try: pdf_path.write_text(note)
        except Exception: pass
    return {"ok": True, "note": note, "pdf": str(pdf_path), "citations": cites, "corr_png": str(CORR_PNG) if CORR_PNG.exists() else None}

In [276]:
def _csv_slice_for_incident(n_tail: int = 200) -> Path | None:
    import pandas as pd
    df = load_live()
    if df is None or df.empty:
        return None
    use_cols = [c for c in [
        "ts","pool","dex_spot","dex_twap","oracle_px","oracle_ratio","dev","dev_roll_std",
        "tvl_outflow_rate","virtual_price","spot_twap_gap_bps","r0_delta","r1_delta",
        "neighbor_max_dev","neighbor_avg_anom","lead_lag_best","corr_best",
        "z_if","z_lof","z_ocsvm","z_cusum","z_ae","z_ae_seq",
        "anom_fused","risk_forecast_10m","risk_forecast_30m","feeds_fresh","y_10m","y_30m"
    ] if c in df.columns]
    sl = df.tail(n_tail)[use_cols].copy()
    p = OUT / "incident_tail.csv"
    try:
        sl.to_csv(p, index=False)
        return p
    except Exception:
        return None

In [277]:
def write_incident_pack(snapshot_md: Path | str, extra_files: list[str | Path] | None = None) -> Path | None:
    """
    Packs: incident_{ts}.md + analyst_note.pdf + explain*.json + tail CSV (+ artifacts).
    Returns path to the ZIP file.
    """
    ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S")
    zpath = OUT / f"incident_pack_{ts}.zip"
    files = []
    snap = Path(snapshot_md)
    if snap.exists():
        files.append(("incident.md", snap))
    note_pdf = OUT / "analyst_note.pdf"
    if note_pdf.exists():
        files.append(("analyst_note.pdf", note_pdf))
    for f in [globals().get("EXPLAIN_JSON", OUT / "explain_10m.json"),
              globals().get("EXPLAIN_30M_JSON", OUT / "explain_30m.json")]:
        try:
            f = Path(f)
            if f.exists():
                files.append((f.name, f))
        except Exception:
            pass
    for pth in [OUT / "artifacts" / "calibration_10m.png",
                OUT / "artifacts" / "calibration_30m.png",
                OUT / "artifacts" / "detector_pr_auc.png",
                CORR_PNG]:
        if Path(pth).exists():
            files.append((Path(pth).name, Path(pth)))
    sl = _csv_slice_for_incident(n_tail=200)
    if sl:
        files.append((Path(sl).name, Path(sl)))
    for e in (extra_files or []):
        p = Path(e)
        if p.exists():
            files.append((p.name, p))
    if not files:
        return None
    with zipfile.ZipFile(zpath, "w", compression=zipfile.ZIP_DEFLATED) as zf:
        for arcname, path in files:
            try:
                zf.write(path, arcname)
            except Exception:
                pass
    return zpath

In [278]:
def save_all_charts_for_report():
    try:
        save_all_calibration_artifacts()
    except Exception:
        pass
    try:
        save_corr_sparkline(win=60)
    except Exception:
        pass

In [279]:
import os, json
from pathlib import Path
import numpy as np
import pandas as pd
import pytest

In [280]:
def _mk_live(tmp):
    out = Path(tmp) / "outputs"
    out.mkdir(parents=True, exist_ok=True)
    live = out / "live_dataset.csv"
    ts = pd.date_range("2025-01-01", periods=25, freq="min", tz="UTC")
    pools = ["USDC/USDT-uni", "DAI/USDC-curve"]
    rows = []
    for p in pools:
        dev = np.linspace(0, 0.008, len(ts))  
        rows += [{"ts": t, "pool": p, "dev": d, "anam": 0.0, "dex_spot": 1.0, "feeds_fresh": True} for t, d in zip(ts, dev)]
    df = pd.DataFrame(rows)
    df["anom_fused"] = 0.0
    df.to_csv(live, index=False)
    return out, live
@pytest.fixture()
def monkey_env(tmp_path, monkeypatch):
    out, live = _mk_live(tmp_path)
    monkeypatch.setenv("MOCK_MODE", "1")
    monkeypatch.setenv("OUT", str(out))
    monkeypatch.setenv("LIVE_CSV", str(live))
    return out, live

In [281]:
def test_feature_engineering_smoke(monkey_env):
    out, live = monkey_env
    import sys
    sys.path.append(str(Path("."))) 
    df = pd.read_csv(live, parse_dates=["ts"])
    assert {"ts", "pool", "dev"}.issubset(df.columns)
    try:
        from sentinel_runtime import compute_network_features  
    except Exception:
        compute_network_features = None
    if compute_network_features:
        res = compute_network_features(win=20, max_lag=5)
        assert set(["pool", "neighbor_max_dev", "neighbor_avg_anom", "lead_lag_best", "corr_best"]).issubset(res.columns)
def test_anomaly_fusion_monotone(monkey_env):
    out, live = monkey_env
    df = pd.read_csv(live, parse_dates=["ts"])
    df["z_if"] = [0.1, 0.2] * (len(df)//2) + [0.1] * (len(df) % 2)
    df["z_lof"] = 0.05
    df["z_ocsvm"] = 0.0
    df["anom_fused"] = df[["z_if", "z_lof", "z_ocsvm"]].max(axis=1)
    df.to_csv(live, index=False)
    assert (df["anom_fused"] >= df["z_if"]).all()
    assert (df["anom_fused"] >= df["z_lof"]).all()

In [282]:
def test_labeler_future_window(monkey_env):
    out, live = monkey_env
    df = pd.read_csv(live, parse_dates=["ts"])
    mask = (df["pool"] == "USDC/USDT-uni")
    idx = df[mask].index[-8]  
    df.loc[idx, "dev"] = 0.006
    df.to_csv(live, index=False)
    try:
        from sentinel_runtime import ensure_labels_fixed_on_live, load_live
        ensure_labels_fixed_on_live(dev_thr=0.005, fused_thr=0.90, h10=10, h30=30, persist_version=False)
        lf = load_live()
        assert "y_10m" in lf.columns

        assert lf[lf["pool"] == "USDC/USDT-uni"]["y_10m"].max() == 1
    except Exception:
        df = pd.read_csv(live, parse_dates=["ts"])
        df = df.sort_values(["pool", "ts"])
        y10 = []
        for p, g in df.groupby("pool"):
            vals = (g["dev"].abs() >= 0.005).to_numpy().astype(int)
            fut = np.zeros_like(vals)
            for j in range(len(vals)):
                fut[j] = 1 if vals[j+1:j+11].any() else 0
            y10 += list(fut)
        df["y_10m"] = y10
        assert df[df["pool"] == "USDC/USDT-uni"]["y_10m"].max() == 1