INCLUDES

In [12]:
import pandas as pd
from pathlib import Path
import numpy as np

CONFIGS

In [13]:
POLYMARKET_MARKETS = []
POLYMARKET_MARKETS.append("0x4319532e181605cb15b1bd677759a3bc7f7394b2fdf145195b700eeaedfd5221")
POLYMARKET_MARKETS.append("0xfa48a99317daef1654d5b03e30557c4222f276657275628d9475e141c64b545d")

#Path to data from this jupyter notebook
DATA_DIR = Path("..") / ".." / "data" / "raw"

STRATEGY_PARAMS

In [28]:
spread_base = 0.02      # base spread 2 cents
vol_target = 0.01      # or can be changed to median over volatility      
vol_alpha = 0.3         # alpha from strategy notebook

window_trades = 20      # last window trades to compute volatility, dont know how to choose this parameter optimaly

Q_max = 500            # not sure how to set this parameter yet
inv_beta = 0.1          


In [25]:
def load_polymarket_timeseries(condition_id: str) -> pd.DataFrame:
    """
        DataFrame sorted by timestamp with additional columns:
        - timestamp_datetime
        - mid_price
        - mid_return
        - rolling_vol_window_trades
        - signed_trade_size
        - rolling_net_flow_window_trades
    """

    path = DATA_DIR / f"trades_{condition_id}.jsonl"
    df = pd.read_json(path, lines=True)

    df["timestamp_datetime"] = pd.to_datetime(df["timestamp"], unit="s", utc=True)
    df = df.sort_values("timestamp_datetime").reset_index(drop=True) #probably already sorted, but just in case


    df["mid_price"] = df["price"].astype(float)


    df["rolling_vol_window_trades"] = (
        df["mid_price"].pct_change()
        .rolling(window_trades)
        .std()
    )

    df["rolling_net_flow_window_trades"] = (
        df["size"]
        .where(df["side"] == "BUY", -df["size"])
        .rolling(window_trades)
        .sum()
    )


    return df

In [26]:
def simulate_mm_for_market(condition_id: str):
    df = load_polymarket_timeseries(condition_id)


    inventory = 0.0
    cash = 0.0
    records = []

    for row in df.itertuples():
        mid_t = row.mid_price

        vol_t = row.rolling_vol_window_trades
        if pd.isna(vol_t) or vol_t <= 0:
            vol_t = vol_target
#

        spread_t = spread_base * (1.0 + vol_alpha * (vol_t / vol_target))


        inv_pressure = inventory / Q_max   
        inv_shift = inv_beta * inv_pressure                          

        mid_real = mid_t - inv_shift

        bid = mid_real - spread_t / 2
        ask = mid_real + spread_t / 2

        # as it is a probability so we can cut values
        bid = max(bid, 0.01)
        ask = min(ask, 0.99)

        side = row.side
        size = float(row.size)
        fill_side = "none"


        if side == "BUY":
            if inventory - size >= -Q_max:
                cash += ask * size
                inventory -= size
                fill_side = "sell"


        elif side == "SELL":
            if inventory + size <= Q_max:
                cash -= bid * size
                inventory += size
                fill_side = "buy"



        pnl = cash + inventory * mid_t

        records.append({
            "timestamp_datetime": row.timestamp_datetime,
            "mid_price": mid_t,
            "bid": bid,
            "ask": ask,
            "spread": spread_t,
            "inventory": inventory,
            "pnl": pnl,
            "trade_side": side,
            "trade_size": size,
            "fill_side": fill_side,
            "rolling_vol_window_trades": vol_t,
            "rolling_net_flow_window_trades": row.rolling_net_flow_window_trades,
        })

    sim_df = pd.DataFrame(records)

    stats = {
        "net_pnl": sim_df["pnl"].iloc[-1],
        "max_inventory_abs": sim_df["inventory"].abs().max(),
        "avg_spread": sim_df["spread"].mean(),
        "total_trades": len(df),
        "total_fills": (sim_df["fill_side"] != "none").sum(),
    }

    print(f"Market: {condition_id}")
    print(f"Net PnL: {stats['net_pnl']:.2f}")
    print(f"Max |inventory|: {stats['max_inventory_abs']:.2f}")
    print(f"Average spread quoted: {stats['avg_spread']:.4f}")
    print(f"Total trades in dataset: {stats['total_trades']}")
    print(f"Total fills (we interacted with taker flow): {stats['total_fills']}")

    return sim_df, stats

In [29]:
for condition_id in POLYMARKET_MARKETS:
    simulate_mm_for_market(condition_id)

Market: 0x4319532e181605cb15b1bd677759a3bc7f7394b2fdf145195b700eeaedfd5221
Net PnL: 9763.34
Max |inventory|: 497.50
Average spread quoted: 17.7889
Total trades in dataset: 500
Total fills (we interacted with taker flow): 404
Market: 0xfa48a99317daef1654d5b03e30557c4222f276657275628d9475e141c64b545d
Net PnL: 11578.87
Max |inventory|: 491.55
Average spread quoted: 7.3596
Total trades in dataset: 502
Total fills (we interacted with taker flow): 434
