# Polymarket Time Series Model Comparison

This notebook evaluates multiple forecasting models on NBA market price time series and summarizes performance with metrics and plots.

Models: Naive, Moving Average, ARIMA, Prophet, LSTM, Transformer (optional if dependencies available).


In [None]:
from pathlib import Path
import json
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Paths
TS_DATA_DIR = Path('notebooks/timeseries_analysis/data')
MARKETS_JSONL = TS_DATA_DIR / 'nba_markets.jsonl'
PRICES_JSONL = TS_DATA_DIR / 'nba_prices_history.jsonl'
OUT_DIR = TS_DATA_DIR / 'analysis'
OUT_DIR.mkdir(parents=True, exist_ok=True)

# Parameters
TRAIN_FRAC = 0.8
MIN_POINTS = 30
MAX_MARKETS_EVAL = 50
DEEP_MODEL_MAX_MARKETS = 10
LOOKBACK = 10
EPOCHS = 20
BATCH_SIZE = 32
SEED = 1337
np.random.seed(SEED)


In [None]:
def load_markets():
    rows = []
    with MARKETS_JSONL.open('r', encoding='utf-8') as f:
        for line in f:
            if line.strip():
                rows.append(json.loads(line))
    return pd.DataFrame(rows)

def load_prices():
    rows = []
    with PRICES_JSONL.open('r', encoding='utf-8') as f:
        for line in f:
            if line.strip():
                rows.append(json.loads(line))
    return rows

def expand_history(price_rows):
    expanded = []
    for row in price_rows:
        m_id = row.get('market_id')
        for point in row.get('history', []):
            if isinstance(point, dict):
                t = point.get('t')
                p = point.get('p')
            elif isinstance(point, (list, tuple)) and len(point) >= 2:
                t, p = point[0], point[1]
            else:
                continue
            if t is None or p is None:
                continue
            expanded.append({
                'market_id': m_id,
                'timestamp': pd.to_datetime(t, unit='s', utc=True, errors='coerce'),
                'price': float(p),
            })
    df = pd.DataFrame(expanded)
    if df.empty:
        return pd.DataFrame(columns=['market_id', 'timestamp', 'price'])
    df = df.dropna(subset=['timestamp'])
    return df.sort_values(['market_id', 'timestamp'])

def build_series(df):
    series = {}
    for mid, g in df.groupby('market_id'):
        g = g.sort_values('timestamp')
        if g['timestamp'].nunique() < 2:
            continue
        if g['price'].nunique() == 1 and float(g['price'].iloc[0]) == 0.5:
            continue
        s = g.set_index('timestamp')['price'].resample('D').last().dropna()
        if len(s) >= MIN_POINTS:
            series[mid] = s
    return series


In [None]:
markets_df = load_markets()
price_rows = load_prices()
prices_df = expand_history(price_rows)
series_by_market = build_series(prices_df)

print('Markets:', len(markets_df))
print('Price rows:', len(price_rows))
print('Series count:', len(series_by_market))


## Metrics and utilities


In [None]:
def train_test_split(series, train_frac):
    n = len(series)
    k = int(n * train_frac)
    return series.iloc[:k], series.iloc[k:]

def rmse(y_true, y_pred):
    return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))

def mae(y_true, y_pred):
    return float(np.mean(np.abs(y_true - y_pred)))

def mape(y_true, y_pred):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    denom = np.where(y_true == 0, np.nan, y_true)
    return float(np.nanmean(np.abs((y_true - y_pred) / denom)))

def directional_accuracy(y_true, y_pred):
    if len(y_true) < 2:
        return np.nan
    true_dir = np.sign(np.diff(y_true))
    pred_dir = np.sign(np.diff(y_pred))
    return float(np.mean(true_dir == pred_dir))

def eval_metrics(y_true, y_pred):
    return {
        'rmse': rmse(y_true, y_pred),
        'mae': mae(y_true, y_pred),
        'mape': mape(y_true, y_pred),
        'directional_acc': directional_accuracy(y_true, y_pred),
    }


## Models


## Grid Search (lightweight)
We run a small grid search for MA window and ARIMA order on a subset of markets to pick reasonable defaults.


In [None]:
GRID_MARKETS = 5
MA_WINDOWS = [3, 5, 7, 10]
ARIMA_ORDERS = [(1,1,1), (2,1,1), (1,1,2)]

grid_series = list(series_by_market.items())[:GRID_MARKETS]

grid_rows = []
for mid, series in grid_series:
    train, test = train_test_split(series, TRAIN_FRAC)
    steps = len(test)
    if steps < 2:
        continue

    # MA window search
    for w in MA_WINDOWS:
        pred = forecast_ma(train, steps, window=w)
        metrics = eval_metrics(test.values, pred)
        grid_rows.append({'market_id': mid, 'model': 'ma', 'param': w, **metrics})

    # ARIMA order search
    try:
        from statsmodels.tsa.arima.model import ARIMA
        for order in ARIMA_ORDERS:
            try:
                model = ARIMA(train, order=order).fit()
                pred = model.forecast(steps=steps).values
                metrics = eval_metrics(test.values, pred)
                grid_rows.append({'market_id': mid, 'model': 'arima', 'param': str(order), **metrics})
            except Exception:
                continue
    except Exception:
        pass

grid_df = pd.DataFrame(grid_rows)
grid_df.head()


In [None]:
if not grid_df.empty:
    grid_summary = grid_df.groupby(['model','param'])[['rmse','mae','mape','directional_acc']].mean()
    grid_summary = grid_summary.sort_values('rmse')
    grid_summary.head(10)
else:
    print('Grid search skipped or no results.')


In [None]:
def forecast_naive(train, steps):
    return np.repeat(train.iloc[-1], steps)

def forecast_ma(train, steps, window=5):
    avg = train.tail(window).mean()
    return np.repeat(avg, steps)

def forecast_arima(train, steps):
    try:
        from statsmodels.tsa.arima.model import ARIMA
        model = ARIMA(train, order=(1, 1, 1)).fit()
        return model.forecast(steps=steps).values
    except Exception as e:
        return None

def forecast_prophet(train, steps):
    try:
        try:
            from prophet import Prophet
        except Exception:
            from fbprophet import Prophet
        df = pd.DataFrame({'ds': train.index.tz_convert(None), 'y': train.values})
        m = Prophet(daily_seasonality=True)
        m.fit(df)
        future = m.make_future_dataframe(periods=steps, freq='D')
        forecast = m.predict(future)
        return forecast['yhat'].tail(steps).values
    except Exception:
        return None

def make_supervised(series, lookback):
    X, y = [], []
    values = series.values
    for i in range(len(values) - lookback):
        X.append(values[i:i + lookback])
        y.append(values[i + lookback])
    return np.array(X), np.array(y)

def forecast_lstm(train, steps, lookback=LOOKBACK):
    try:
        import torch
        import torch.nn as nn
        X, y = make_supervised(train, lookback)
        if len(X) < 10:
            return None
        X = torch.tensor(X, dtype=torch.float32).unsqueeze(-1)
        y = torch.tensor(y, dtype=torch.float32).unsqueeze(-1)

        class LSTMModel(nn.Module):
            def __init__(self, hidden=32):
                super().__init__()
                self.lstm = nn.LSTM(1, hidden, batch_first=True)
                self.fc = nn.Linear(hidden, 1)

            def forward(self, x):
                out, _ = self.lstm(x)
                return self.fc(out[:, -1, :])

        model = LSTMModel()
        opt = torch.optim.Adam(model.parameters(), lr=1e-3)
        loss_fn = nn.MSELoss()

        for _ in range(EPOCHS):
            idx = torch.randperm(len(X))
            for i in range(0, len(X), BATCH_SIZE):
                batch = idx[i:i+BATCH_SIZE]
                xb, yb = X[batch], y[batch]
                opt.zero_grad()
                pred = model(xb)
                loss = loss_fn(pred, yb)
                loss.backward()
                opt.step()

        # autoregressive forecast
        history = train.values.tolist()
        preds = []
        model.eval()
        for _ in range(steps):
            x = torch.tensor(history[-lookback:], dtype=torch.float32).view(1, lookback, 1)
            with torch.no_grad():
                yhat = model(x).item()
            preds.append(yhat)
            history.append(yhat)
        return np.array(preds)
    except Exception:
        return None

def forecast_transformer(train, steps, lookback=LOOKBACK):
    try:
        import torch
        import torch.nn as nn
        X, y = make_supervised(train, lookback)
        if len(X) < 10:
            return None
        X = torch.tensor(X, dtype=torch.float32).unsqueeze(-1)
        y = torch.tensor(y, dtype=torch.float32).unsqueeze(-1)

        class TransformerModel(nn.Module):
            def __init__(self, d_model=32, nhead=4, num_layers=2):
                super().__init__()
                self.input_proj = nn.Linear(1, d_model)
                enc_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, batch_first=True)
                self.encoder = nn.TransformerEncoder(enc_layer, num_layers=num_layers)
                self.fc = nn.Linear(d_model, 1)

            def forward(self, x):
                x = self.input_proj(x)
                x = self.encoder(x)
                return self.fc(x[:, -1, :])

        model = TransformerModel()
        opt = torch.optim.Adam(model.parameters(), lr=1e-3)
        loss_fn = nn.MSELoss()

        for _ in range(EPOCHS):
            idx = torch.randperm(len(X))
            for i in range(0, len(X), BATCH_SIZE):
                batch = idx[i:i+BATCH_SIZE]
                xb, yb = X[batch], y[batch]
                opt.zero_grad()
                pred = model(xb)
                loss = loss_fn(pred, yb)
                loss.backward()
                opt.step()

        history = train.values.tolist()
        preds = []
        model.eval()
        for _ in range(steps):
            x = torch.tensor(history[-lookback:], dtype=torch.float32).view(1, lookback, 1)
            with torch.no_grad():
                yhat = model(x).item()
            preds.append(yhat)
            history.append(yhat)
        return np.array(preds)
    except Exception:
        return None


## Batch evaluation


In [None]:
results = []
markets = list(series_by_market.items())
markets = sorted(markets, key=lambda x: len(x[1]), reverse=True)[:MAX_MARKETS_EVAL]
deep_markets = set([m[0] for m in markets[:DEEP_MODEL_MAX_MARKETS]])

for mid, series in markets:
    train, test = train_test_split(series, TRAIN_FRAC)
    steps = len(test)
    if steps < 2:
        continue

    preds = {}
    preds['naive'] = forecast_naive(train, steps)
    preds['ma5'] = forecast_ma(train, steps, window=5)

    arima_pred = forecast_arima(train, steps)
    if arima_pred is not None:
        preds['arima'] = arima_pred

    prophet_pred = forecast_prophet(train, steps)
    if prophet_pred is not None:
        preds['prophet'] = prophet_pred

    if mid in deep_markets:
        lstm_pred = forecast_lstm(train, steps)
        if lstm_pred is not None:
            preds['lstm'] = lstm_pred
        transformer_pred = forecast_transformer(train, steps)
        if transformer_pred is not None:
            preds['transformer'] = transformer_pred

    for model, yhat in preds.items():
        metrics = eval_metrics(test.values, yhat)
        results.append({
            'market_id': mid,
            'model': model,
            'n_train': len(train),
            'n_test': len(test),
            **metrics,
        })

results_df = pd.DataFrame(results)
results_df.head()


In [None]:
# Summary stats by model
summary = results_df.groupby('model')[['rmse','mae','mape','directional_acc']].mean().sort_values('rmse')
summary


In [None]:
# Rank models per market and compute average rank
ranked = results_df.copy()
ranked['rank_rmse'] = ranked.groupby('market_id')['rmse'].rank(method='average')
ranked['rank_mae'] = ranked.groupby('market_id')['mae'].rank(method='average')
ranked['rank_mape'] = ranked.groupby('market_id')['mape'].rank(method='average')
ranked['rank_dir'] = ranked.groupby('market_id')['directional_acc'].rank(ascending=False, method='average')
rank_summary = ranked.groupby('model')[['rank_rmse','rank_mae','rank_mape','rank_dir']].mean()
rank_summary


## Plots


In [None]:
summary.plot(kind='bar', figsize=(10, 4))
plt.title('Mean Metrics by Model')
plt.tight_layout()


In [None]:
# RMSE distribution
fig, ax = plt.subplots(figsize=(8, 4))
for model in results_df['model'].unique():
    vals = results_df[results_df['model'] == model]['rmse'].dropna()
    ax.hist(vals, bins=20, alpha=0.4, label=model)
ax.legend()
ax.set_title('RMSE Distribution by Model')
plt.tight_layout()


In [None]:
# Save outputs
results_df.to_csv(OUT_DIR / 'model_results.csv', index=False)
summary.to_csv(OUT_DIR / 'model_summary.csv')
rank_summary.to_csv(OUT_DIR / 'model_rank_summary.csv')
print('Saved results to', OUT_DIR)
