<a href="https://colab.research.google.com/github/PrashanthBhaskara/KalshiCorrelationForecast/blob/main/AI_Brier_Score.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

files = [
    "kalshi-price-history-kxllm1-25dec31-day.csv",
    "kalshi-price-history-kxllm1-26jan17-day.csv",
    "kalshi-price-history-kxllm1-26jan24-day.csv",
    "kalshi-price-history-kxllm1-26jan31-day.csv",
    "kalshi-price-history-kxllm1-26feb07-day.csv",
    "kalshi-price-history-kxllm1-26feb14-day.csv",
]

MODELS = ["Claude","Gemini","ChatGPT","Grok"]

def corr_from_diff(fp):
    df = pd.read_csv(fp)
    df["timestamp"] = pd.to_datetime(df["timestamp"], utc=True, errors="coerce")
    df = df.sort_values("timestamp").set_index("timestamp")

    df = df[MODELS].apply(pd.to_numeric, errors="coerce")

    chg = df.diff()  # daily change in probability points
    chg = chg.dropna(how="any")  # ensures same rows for all models

    # if not enough rows, correlation is unstable; return NaN matrix
    if len(chg) < 3:
        return pd.DataFrame(np.nan, index=MODELS, columns=MODELS)

    return chg.corr()

corr_mats = {fp: corr_from_diff(fp) for fp in files}

summary = pd.Series(
    {fp: corr_mats[fp].loc["Gemini","ChatGPT"] for fp in files},
    name="Corr(ΔGemini, ΔChatGPT)"
).sort_index()

summary

Unnamed: 0,"Corr(ΔGemini, ΔChatGPT)"
kalshi-price-history-kxllm1-25dec31-day.csv,-0.299115
kalshi-price-history-kxllm1-26feb07-day.csv,-0.414117
kalshi-price-history-kxllm1-26feb14-day.csv,0.193527
kalshi-price-history-kxllm1-26jan17-day.csv,-0.779866
kalshi-price-history-kxllm1-26jan24-day.csv,-0.296122
kalshi-price-history-kxllm1-26jan31-day.csv,-0.114715


In [2]:
import pandas as pd
import numpy as np
import glob
import os
import re
import requests

# If you're using Colab "Files" panel, your uploaded files are usually in /content
# If you're running locally, adjust the folder path.
DATA_DIR = "/content"

files = sorted(glob.glob(os.path.join(DATA_DIR, "kalshi-price-history-kxllm1-*-day.csv")))
print("Found files:", len(files))
for f in files:
    print(" -", os.path.basename(f))

dfs = []
for f in files:
    df = pd.read_csv(f)
    df["source_file"] = os.path.basename(f)
    dfs.append(df)

raw = pd.concat(dfs, ignore_index=True)
raw.head()

Found files: 7
 - kalshi-price-history-kxllm1-25dec31-day.csv
 - kalshi-price-history-kxllm1-26feb07-day.csv
 - kalshi-price-history-kxllm1-26feb14-day.csv
 - kalshi-price-history-kxllm1-26feb21-day.csv
 - kalshi-price-history-kxllm1-26jan17-day.csv
 - kalshi-price-history-kxllm1-26jan24-day.csv
 - kalshi-price-history-kxllm1-26jan31-day.csv


Unnamed: 0,timestamp,Gemini,Claude,Qwen,DeepSeek,LLaMA,ChatGPT,Grok,source_file,Ernie,Dola
0,2024-11-06T00:00:00Z,,2.0,,,,99.0,2.0,kalshi-price-history-kxllm1-25dec31-day.csv,,
1,2024-11-07T00:00:00Z,11.93,4.08,,,,85.78,60.84,kalshi-price-history-kxllm1-25dec31-day.csv,,
2,2024-11-08T00:00:00Z,8.0,12.82,,,,65.45,22.55,kalshi-price-history-kxllm1-25dec31-day.csv,,
3,2024-11-09T00:00:00Z,13.44,12.83,,,,66.29,13.4,kalshi-price-history-kxllm1-25dec31-day.csv,,
4,2024-11-10T00:00:00Z,15.99,12.0,,,,66.81,12.0,kalshi-price-history-kxllm1-25dec31-day.csv,,


In [3]:
def file_to_event_ticker(filename: str) -> str:
    # filename example: kalshi-price-history-kxllm1-25dec31-day.csv
    m = re.search(r"kxllm1-([0-9]{2}[a-z]{3}[0-9]{2})", filename.lower())
    if not m:
        raise ValueError(f"Could not parse event date from filename: {filename}")
    date_code = m.group(1).upper()     # 25DEC31
    return f"KXLLM1-{date_code}"       # KXLLM1-25DEC31

# quick check
for f in files:
    print(os.path.basename(f), "->", file_to_event_ticker(os.path.basename(f)))

kalshi-price-history-kxllm1-25dec31-day.csv -> KXLLM1-25DEC31
kalshi-price-history-kxllm1-26feb07-day.csv -> KXLLM1-26FEB07
kalshi-price-history-kxllm1-26feb14-day.csv -> KXLLM1-26FEB14
kalshi-price-history-kxllm1-26feb21-day.csv -> KXLLM1-26FEB21
kalshi-price-history-kxllm1-26jan17-day.csv -> KXLLM1-26JAN17
kalshi-price-history-kxllm1-26jan24-day.csv -> KXLLM1-26JAN24
kalshi-price-history-kxllm1-26jan31-day.csv -> KXLLM1-26JAN31


In [4]:
BASE = "https://api.elections.kalshi.com/trade-api/v2"

def get_event_markets(event_ticker: str, status="all", limit=1000):
    # Pull all markets in this event (each market = one model contract)
    url = f"{BASE}/markets"
    params = {"event_ticker": event_ticker, "status": status, "limit": limit}
    r = requests.get(url, params=params, timeout=30)
    r.raise_for_status()
    return r.json()["markets"]

def normalize(s: str) -> str:
    s = (s or "").lower()
    s = re.sub(r"[^a-z0-9]+", "", s)
    return s

# Map Kalshi market title/subtitle text -> your CSV column names
MODEL_ALIASES = {
    "Gemini":   ["gemini", "google"],
    "Claude":   ["claude", "anthropic"],
    "ChatGPT":  ["chatgpt", "openai", "gpt"],
    "Grok":     ["grok", "xai"],
    "LLaMA":    ["llama", "meta"],
    "Qwen":     ["qwen", "alibaba"],
    "DeepSeek": ["deepseek"],
}

def market_to_model_name(mkt: dict, model_columns: list[str]) -> str | None:
    text_fields = [
        mkt.get("title",""),
        mkt.get("subtitle",""),
        mkt.get("yes_sub_title",""),
        mkt.get("no_sub_title",""),
        mkt.get("ticker",""),
    ]
    blob = " ".join(text_fields)
    blob_norm = normalize(blob)

    # Try alias matching first
    for model, aliases in MODEL_ALIASES.items():
        if model in model_columns:
            for a in aliases:
                if normalize(a) in blob_norm:
                    return model

    # Fallback: direct contains model name
    for model in model_columns:
        if normalize(model) in blob_norm:
            return model

    return None

def get_winner_model_for_event(event_ticker: str, model_columns: list[str]) -> str:
    mkts = get_event_markets(event_ticker, status="all")
    # Find settled YES market(s)
    yes_settled = [m for m in mkts if (m.get("result") == "yes")]
    if len(yes_settled) == 0:
        raise ValueError(f"No YES-settled market found for event {event_ticker}. Is it not resolved yet?")
    if len(yes_settled) > 1:
        # should not happen, but handle safely
        # pick the first after sorting ticker
        yes_settled = sorted(yes_settled, key=lambda x: x.get("ticker",""))

    winner_market = yes_settled[0]
    winner_model = market_to_model_name(winner_market, model_columns)
    if winner_model is None:
        raise ValueError(
            f"Could not map winning market to a CSV column.\n"
            f"Event: {event_ticker}\nWinning market ticker: {winner_market.get('ticker')}\nTitle: {winner_market.get('title')}"
        )
    return winner_model

In [5]:
def last_non_null(series: pd.Series):
    s = series.dropna()
    return s.iloc[-1] if len(s) else np.nan

# Identify model columns in your CSVs (everything except timestamp + source_file)
ignore_cols = {"timestamp", "source_file"}
model_cols = [c for c in raw.columns if c not in ignore_cols]

print("Model columns:", model_cols)

final_rows = []
for f in files:
    fn = os.path.basename(f)
    event_ticker = file_to_event_ticker(fn)
    df = pd.read_csv(f)

    row = {"event_ticker": event_ticker, "source_file": fn}
    for m in model_cols:
        if m in df.columns:
            row[m] = last_non_null(df[m])
        else:
            row[m] = np.nan
    final_rows.append(row)

final_probs = pd.DataFrame(final_rows)

# Convert cents -> probability in [0,1]
for m in model_cols:
    final_probs[m] = final_probs[m] / 100.0

final_probs

Model columns: ['Gemini', 'Claude', 'Qwen', 'DeepSeek', 'LLaMA', 'ChatGPT', 'Grok', 'Ernie', 'Dola']


Unnamed: 0,event_ticker,source_file,Gemini,Claude,Qwen,DeepSeek,LLaMA,ChatGPT,Grok,Ernie,Dola
0,KXLLM1-25DEC31,kalshi-price-history-kxllm1-25dec31-day.csv,0.9782,0.01,0.01,0.01,0.01,0.019,0.0152,,
1,KXLLM1-26FEB07,kalshi-price-history-kxllm1-26feb07-day.csv,0.0766,0.7517,0.01,,0.01,0.0112,0.0103,0.01,
2,KXLLM1-26FEB14,kalshi-price-history-kxllm1-26feb14-day.csv,0.0314,0.9802,0.01,,0.01,0.0118,0.01,0.01,
3,KXLLM1-26FEB21,kalshi-price-history-kxllm1-26feb21-day.csv,0.0147,0.9782,0.01,,0.01,0.0101,0.01,0.01,0.01
4,KXLLM1-26JAN17,kalshi-price-history-kxllm1-26jan17-day.csv,0.9833,0.01,0.01,,0.01,0.0119,0.0115,0.01,
5,KXLLM1-26JAN24,kalshi-price-history-kxllm1-26jan24-day.csv,0.854,0.01,0.01,,0.01,0.0109,0.0147,0.01,
6,KXLLM1-26JAN31,kalshi-price-history-kxllm1-26jan31-day.csv,0.9799,0.01,0.01,,0.01,0.0118,0.0161,0.01,


In [9]:
def get_winner_model_for_event(event_ticker: str, model_columns: list[str]) -> str:
    mkts = get_event_markets(event_ticker)   # ✅ no status argument

    # Find settled YES market(s)
    yes_settled = [m for m in mkts if (m.get("result") == "yes")]

    if len(yes_settled) == 0:
        # helpful debug print
        statuses = sorted(set((m.get("status"), m.get("result")) for m in mkts))
        raise ValueError(
            f"No YES-settled market found for event {event_ticker}. "
            f"Unique (status,result) pairs seen: {statuses[:10]}"
        )

    # pick one (should be exactly one)
    winner_market = yes_settled[0]

    winner_model = market_to_model_name(winner_market, model_columns)
    if winner_model is None:
        raise ValueError(
            "Could not map winning market to a CSV column.\n"
            f"Event: {event_ticker}\n"
            f"Winning market ticker: {winner_market.get('ticker')}\n"
            f"Title: {winner_market.get('title')}\n"
            f"Subtitle: {winner_market.get('subtitle')}"
        )

    return winner_model

In [10]:
event = "KXLLM1-25DEC31"
print("Winner for", event, "=", get_winner_model_for_event(event, model_cols))

Winner for KXLLM1-25DEC31 = Gemini


In [12]:
winners = []
valid_rows = []

for _, r in final_probs.iterrows():
    event = r["event_ticker"]

    try:
        winner = get_winner_model_for_event(event, model_cols)
        winners.append(winner)
        valid_rows.append(True)

    except ValueError as e:
        # Skip unsettled events
        print(f"Skipping {event} (not settled)")
        winners.append(None)
        valid_rows.append(False)

final_probs["winner_model"] = winners
final_probs["is_settled"] = valid_rows

# Keep only settled events
final_probs_settled = final_probs[final_probs["is_settled"]].copy()

final_probs_settled[["event_ticker", "winner_model"]]

Skipping KXLLM1-26FEB21 (not settled)


Unnamed: 0,event_ticker,winner_model
0,KXLLM1-25DEC31,Gemini
1,KXLLM1-26FEB07,Claude
2,KXLLM1-26FEB14,Claude
4,KXLLM1-26JAN17,Gemini
5,KXLLM1-26JAN24,Gemini
6,KXLLM1-26JAN31,Gemini


In [13]:
def multiclass_brier(row, model_cols):
    w = row["winner_model"]
    s = 0.0
    for m in model_cols:
        p = row[m]
        if pd.isna(p):
            continue
        y = 1.0 if (m == w) else 0.0
        s += (p - y)**2
    return s

final_probs_settled["brier_multiclass"] = final_probs_settled.apply(
    lambda r: multiclass_brier(r, model_cols), axis=1
)

print("Mean multi-class Brier (settled only):",
      final_probs_settled["brier_multiclass"].mean())

final_probs_settled[["event_ticker", "winner_model", "brier_multiclass"]]

Mean multi-class Brier (settled only): 0.015940434999999996


Unnamed: 0,event_ticker,winner_model,brier_multiclass
0,KXLLM1-25DEC31,Gemini,0.001467
1,KXLLM1-26FEB07,Claude,0.068052
2,KXLLM1-26FEB14,Claude,0.001917
4,KXLLM1-26JAN17,Gemini,0.000953
5,KXLLM1-26JAN24,Gemini,0.022051
6,KXLLM1-26JAN31,Gemini,0.001202


In [14]:
rows = []
for m in model_cols:
    y = (final_probs_settled["winner_model"] == m).astype(float)
    p = final_probs_settled[m].astype(float)
    brier = ((p - y)**2).mean()
    rows.append({"model": m, "brier_binary_mean": brier})

per_model_brier = pd.DataFrame(rows).sort_values("brier_binary_mean")
per_model_brier

Unnamed: 0,model,brier_binary_mean
3,DeepSeek,0.0001
2,Qwen,0.0001
7,Ernie,0.0001
4,LLaMA,0.0001
5,ChatGPT,0.000171
6,Grok,0.000174
0,Gemini,0.004888
1,Claude,0.010407
8,Dola,


In [15]:
# Check final probabilities you are scoring on (LAST snapshot per event)
cols = ["event_ticker", "winner_model"] + model_cols
display(final_probs_settled[cols].sort_values("event_ticker"))

# Check whether some model columns are basically constant / tiny
summary = final_probs_settled[model_cols].agg(["min","max","mean","std"]).T.sort_values("mean")
display(summary)

# Check sums (multi-outcome markets are usually roughly summing near 1)
final_probs_settled["prob_sum"] = final_probs_settled[model_cols].sum(axis=1, skipna=True)
display(final_probs_settled[["event_ticker","prob_sum"]].sort_values("event_ticker"))

Unnamed: 0,event_ticker,winner_model,Gemini,Claude,Qwen,DeepSeek,LLaMA,ChatGPT,Grok,Ernie,Dola
0,KXLLM1-25DEC31,Gemini,0.9782,0.01,0.01,0.01,0.01,0.019,0.0152,,
1,KXLLM1-26FEB07,Claude,0.0766,0.7517,0.01,,0.01,0.0112,0.0103,0.01,
2,KXLLM1-26FEB14,Claude,0.0314,0.9802,0.01,,0.01,0.0118,0.01,0.01,
4,KXLLM1-26JAN17,Gemini,0.9833,0.01,0.01,,0.01,0.0119,0.0115,0.01,
5,KXLLM1-26JAN24,Gemini,0.854,0.01,0.01,,0.01,0.0109,0.0147,0.01,
6,KXLLM1-26JAN31,Gemini,0.9799,0.01,0.01,,0.01,0.0118,0.0161,0.01,


Unnamed: 0,min,max,mean,std
DeepSeek,0.01,0.01,0.01,
Qwen,0.01,0.01,0.01,0.0
Ernie,0.01,0.01,0.01,0.0
LLaMA,0.01,0.01,0.01,0.0
ChatGPT,0.0109,0.019,0.012767,0.003079
Grok,0.01,0.0161,0.012967,0.002679
Claude,0.01,0.9802,0.295317,0.447878
Gemini,0.0314,0.9833,0.650567,0.46491
Dola,,,,


Unnamed: 0,event_ticker,prob_sum
0,KXLLM1-25DEC31,1.0524
1,KXLLM1-26FEB07,0.8798
2,KXLLM1-26FEB14,1.0634
4,KXLLM1-26JAN17,1.0467
5,KXLLM1-26JAN24,0.9196
6,KXLLM1-26JAN31,1.0478


In [18]:
final_probs_settled["prob_sum"] = final_probs_settled[model_cols].sum(axis=1, skipna=True)
final_probs_settled[["event_ticker","prob_sum"]].sort_values("event_ticker")

Unnamed: 0,event_ticker,prob_sum
0,KXLLM1-25DEC31,1.0524
1,KXLLM1-26FEB07,0.8798
2,KXLLM1-26FEB14,1.0634
4,KXLLM1-26JAN17,1.0467
5,KXLLM1-26JAN24,0.9196
6,KXLLM1-26JAN31,1.0478


In [19]:
# Score at earlier time
from datetime import timedelta
import pandas as pd
import numpy as np

def get_event_close_ts(event_ticker: str):
    # Get any market in that event and read close_time from it
    mkts = get_event_markets(event_ticker)
    # pick first market
    m = mkts[0]
    # Fields vary; try common names
    for k in ["close_time", "close_ts", "close_date", "close_datetime"]:
        if k in m and m[k]:
            return pd.to_datetime(m[k], unit="s", errors="ignore")
    # fallback: use status/settlement timestamps if present
    return None

# If your raw dataframe has a timestamp column (e.g. 'ts' or 'timestamp'), set it here:
TIME_COL = "timestamp"  # change to your actual time column name

raw[TIME_COL] = pd.to_datetime(raw[TIME_COL])

In [20]:
model_cols = [m for m in model_cols if final_probs_settled[m].notna().any()]

In [22]:
import os

os.makedirs("outputs", exist_ok=True)
final_probs_settled.to_csv("outputs/brier_by_event.csv", index=False)
per_model_brier.to_csv("outputs/brier_by_model.csv", index=False)