In [None]:
# @title
# ======================================================================
# POLYMARKET ORACLE v2.6 (Clean & Silent)
# Changes:
#  - FIXED: UserWarning spam (matches feature names for prediction)
#  - TUNED: Relaxed "Crossed Market" buffer (Bid > Ask + 0.005)
#  - OPTIMIZED: Faster live scan loop
# ======================================================================

!pip install -q requests pandas numpy scikit-learn tqdm

import time, json, math, concurrent.futures, warnings
from dataclasses import dataclass
from concurrent.futures import ThreadPoolExecutor, as_completed
from collections import Counter
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

import requests
import numpy as np
import pandas as pd

from tqdm.auto import tqdm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score

# Silence specific sklearn warning
warnings.filterwarnings("ignore", category=UserWarning, module="sklearn")

GAMMA_API = "https://gamma-api.polymarket.com"
CLOB_API  = "https://clob.polymarket.com"
DATA_API  = "https://data-api.polymarket.com"

@dataclass
class OracleConfig:
    # ---- Training ----
    train_fetch_markets_max: int = 2500
    train_threads: int = 16

    label_hi: float = 0.85
    label_lo: float = 0.15

    # ---- Live ----
    live_limit: int = 150
    min_live_liquidity: float = 500
    min_live_edge: float = 0.015
    spread_penalty: float = 0.15

    # ---- Wallets ----
    min_total_value: float = 2500.0
    min_closed_positions: int = 5
    holders_limit: int = 100

    timeout: int = 5

cfg = OracleConfig()

# Robust Session
sess = requests.Session()
adapter = HTTPAdapter(pool_connections=20, pool_maxsize=20, max_retries=Retry(total=3, backoff_factor=0.5))
sess.mount("https://", adapter)

# ----------------------------------------------------------------------
# HELPERS
# ----------------------------------------------------------------------
def _safe_logit(p): return math.log(min(max(p, 1e-6), 1-1e-6) / (1 - min(max(p, 1e-6), 1-1e-6)))

def _get_price(hist, ts):
    if not hist: return None
    lo, hi = 0, len(hist)-1
    best = None
    while lo <= hi:
        mid = (lo+hi)//2
        t = int(hist[mid].get("t", 0))
        if t <= ts:
            best = hist[mid]
            lo = mid+1
        else: hi = mid-1
    return float(best.get("p")) if best else None

def parse_iso(s):
    try:
        import datetime
        return int(datetime.datetime.fromisoformat(s.replace("Z", "+00:00")).timestamp())
    except: return None

# ----------------------------------------------------------------------
# PIPELINE
# ----------------------------------------------------------------------
def build_model(cfg):
    print(f"[train] Fetching metadata for {cfg.train_fetch_markets_max} markets...")
    cands = []
    off = 0
    while len(cands) < cfg.train_fetch_markets_max:
        try:
            r = sess.get(f"{GAMMA_API}/markets", params={
                "closed": "true", "limit": 500, "offset": off,
                "order": "updatedAt", "ascending": "false"
            }, timeout=4)
            batch = r.json() if r.status_code == 200 else []
        except: batch = []

        if not batch: break
        for m in batch:
            if m.get("clobTokenIds") and "endDateIso" in m:
                cands.append(m)
        off += 500
        time.sleep(0.1)

    cands = cands[:cfg.train_fetch_markets_max]
    print(f"[train] Deep scanning history for {len(cands)} candidates...")

    def worker(m):
        try:
            end = parse_iso(m["endDateIso"])
            tids = json.loads(m["clobTokenIds"])
            if len(tids) != 2: return []

            hist = []
            for fid in [60, 0]: # Fidelity fallback
                try:
                    r = sess.get(f"{CLOB_API}/prices-history", params={
                        "market": tids[0], "startTs": end - 7*86400, "endTs": end, "fidelity": fid
                    }, timeout=5)
                    d = r.json().get("history", [])
                    if d:
                        hist = d
                        break
                except: pass

            if not hist: return []

            p_end = _get_price(hist, end)
            if p_end is None: return []

            label = 1 if p_end >= cfg.label_hi else (0 if p_end <= cfg.label_lo else None)
            if label is None: return []

            rows = []
            for h in [6, 24, 72]:
                t = end - h*3600
                p = _get_price(hist, t)
                if p is None or not (0.02 < p < 0.98): continue
                p24 = _get_price(hist, t - 86400) or p
                rows.append([p, _safe_logit(p), p-p24, float(h), label])
            return rows
        except: return []

    data = []
    with ThreadPoolExecutor(cfg.train_threads) as exe:
        futs = [exe.submit(worker, m) for m in cands]
        for f in tqdm(as_completed(futs), total=len(cands), desc="Building"):
            data.extend(f.result())

    if not data: return None

    df = pd.DataFrame(data, columns=["p","logit_p","d24","ttc","y"]).sort_values("ttc")
    print(f"[train] {len(df)} rows collected.")

    split = int(len(df)*0.85)
    X = df.iloc[:, :4]
    y = df.iloc[:, 4]

    clf = RandomForestClassifier(n_estimators=100, max_depth=5, n_jobs=-1)
    clf.fit(X.iloc[:split], y.iloc[:split])

    if len(y.iloc[split:].unique()) > 1:
        auc = roc_auc_score(y.iloc[split:], clf.predict_proba(X.iloc[split:])[:,1])
        print(f"[train] Val AUC: {auc:.3f}")

    return clf

def scan(clf, cfg):
    print("\n[live] Scanning...")
    try:
        r = sess.get(f"{GAMMA_API}/markets", params={"closed":"false", "limit": cfg.live_limit}, timeout=4)
        mkts = r.json() if r.status_code == 200 else []
    except: mkts = []

    reqs = []
    meta = []
    stats = Counter()

    for m in mkts:
        if float(m.get("liquidityNum") or 0) < cfg.min_live_liquidity:
            stats["low_liq"] += 1
            continue
        try:
            tids = json.loads(m["clobTokenIds"])
            if len(tids) != 2: continue
            meta.append((m, tids))
            reqs.extend([
                {"token_id": tids[0], "side": "BUY"}, {"token_id": tids[0], "side": "SELL"},
                {"token_id": tids[1], "side": "BUY"}, {"token_id": tids[1], "side": "SELL"}
            ])
        except: pass

    pmap = {}
    for i in range(0, len(reqs), 200):
        try:
            r = sess.post(f"{CLOB_API}/prices", json=reqs[i:i+200], timeout=4)
            if r.status_code == 200: pmap.update(r.json())
        except: pass
        time.sleep(0.1)

    out = []
    for m, (t0, t1) in meta:
        def get_quote(t):
            q = pmap.get(t, {})
            b = float(q.get("SELL") or 0)
            a_raw = q.get("BUY")
            a = float(a_raw) if a_raw and float(a_raw) > 0 else 999.0
            return b, a

        b0, a0 = get_quote(t0)
        b1, a1 = get_quote(t1)

        # Relaxed Crossed Logic
        # Only reject if Bid is STRICTLY > Ask + buffer (0.005) AND Ask is valid
        if b0 > (a0 + 0.005) and a0 < 2:
            stats["crossed_market"] += 1
            continue

        if a0 > 1 and a1 > 1:
            stats["no_sellers"] += 1
            continue

        real_a0 = a0 if a0 < 2 else b0 + 0.05
        mid = (b0 + real_a0)/2

        # FIX: Pass DataFrame with feature names
        row = pd.DataFrame([[mid, _safe_logit(mid), 0.0, 24.0]], columns=["p","logit_p","d24","ttc"])
        prob = clf.predict_proba(row)[:,1][0] if clf else mid

        e0 = (prob - a0) - cfg.spread_penalty*(a0-b0) if a0 < 2 else -1
        e1 = ((1-prob) - a1) - cfg.spread_penalty*(a1-b1) if a1 < 2 else -1

        best = max(e0, e1)
        if best < cfg.min_live_edge:
            stats["no_edge"] += 1
            continue

        side = "YES" if e0 > e1 else "NO"
        out.append({
            "Question": m.get("question")[:40],
            "Side": side,
            "Edge": round(best, 3),
            "Model": round(prob, 2),
            "Exec": round(a0 if side=="YES" else a1, 2),
            "Spread": round((a0-b0) if side=="YES" else (a1-b1), 3),
            "cid": m.get("conditionId")
        })

    print("REJECTION REPORT:", dict(stats))
    if not out: return pd.DataFrame()
    return pd.DataFrame(out).sort_values("Edge", ascending=False)

def wallets(df, cfg):
    if df.empty: return
    print(f"\n[wallet] Auditing top {min(len(df), 5)} markets...")
    users = set()
    for cid in df["cid"].head(5):
        try:
            r = sess.get(f"{DATA_API}/holders", params={"market": cid, "limit": cfg.holders_limit})
            for g in r.json():
                for h in g.get("holders", []):
                    if h.get("proxyWallet"): users.add(h["proxyWallet"])
        except: pass

    print(f"[wallet] Checking {len(users)} holders...")
    res = []
    for u in tqdm(list(users)[:50]):
        try:
            r = sess.get(f"{DATA_API}/closed-positions", params={"user": u, "limit": 30})
            hist = r.json()
            if not isinstance(hist, list) or len(hist) < cfg.min_closed_positions: continue

            pnls = [float(p.get("realizedPnl") or 0) for p in hist]
            if np.std(pnls) == 0: continue
            sharpe = np.mean(pnls) / np.std(pnls)

            val_r = sess.get(f"{DATA_API}/value", params={"user": u})
            val = float(val_r.json()[0].get("value") or 0)

            if val > cfg.min_total_value and sharpe > 0.05:
                res.append({"User": u, "Sharpe": round(sharpe, 2), "Value": int(val)})
        except: pass

    if res:
        print(pd.DataFrame(res).sort_values("Sharpe", ascending=False).head(5).to_string(index=False))
    else:
        print("No smart wallets found.")

# Run
clf = build_model(cfg)
df = scan(clf, cfg)
if not df.empty:
    print(df.drop(columns=["cid"]).head(10).to_string(index=False))
    wallets(df, cfg)
else:
    print("No valid trades found.")

[train] Fetching metadata for 2500 markets...
[train] Deep scanning history for 2500 candidates...


Building:   0%|          | 0/2500 [00:00<?, ?it/s]

[train] 864 rows collected.
[train] Val AUC: 0.961

[live] Scanning...
REJECTION REPORT: {'low_liq': 3, 'no_edge': 114, 'crossed_market': 24}
                                Question Side  Edge  Model  Exec  Spread
Will OpenAI have the top AI model on Dec   NO 0.078    0.0  0.92  -0.002
Will Bitcoin dip to $70,000 by December    NO 0.040    0.0  0.96  -0.005
               Weed rescheduled in 2025?   NO 0.031    0.0  0.97  -0.001
     Russia x Ukraine ceasefire in 2025?   NO 0.029    0.0  0.97  -0.001
Will xAI have the top AI model on Decemb   NO 0.026    0.0  0.97  -0.001
Will A Minecraft Movie be the top grossi  YES 0.020    1.0  0.98  -0.002
         Harry & Meghan divorce in 2025?   NO 0.016    0.0  0.98  -0.003
Will 'Avatar: Fire and Ash' have the bes   NO 0.016    0.0  0.98  -0.003
      Tim Cook out as Apple CEO in 2025?   NO 0.015    0.0  0.98  -0.004

[wallet] Auditing top 5 markets...
[wallet] Checking 187 holders...


  0%|          | 0/50 [00:00<?, ?it/s]

                                      User  Sharpe   Value
0x2a019dc0089ea8c6edbbafc8a7cc9ba77b4b6397    3.48  152820
0x090a0d3fc9d68d3e16db70e3460e3e4b510801b4    2.64 1174684
0x37e4728b3c4607fb2b3b205386bb1d1fb1a8c991    2.39  191320
0x6204bad10670eede693eaaf96c039d751298421a    1.69   17280
0x2c3928af89565c352afe8e2a1b25deed77a056dc    1.48   25785
