# Polymarket Arbitrage Analysis

This notebook implements a quantitative analysis framework to detect statistical arbitrage between Polymarket prediction contracts and Spot markets.

In [None]:
# 1. Tech Stack & Improts
import ccxt
import pandas as pd
import requests
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import re
from datetime import datetime, timezone

# Suppress warnings for cleaner output
import warnings
warnings.filterwarnings('ignore')

In [None]:
# 2. Configuration & Constants

POLYMARKET_GAMMA_API = "https://gamma-api.polymarket.com/events"

# Trading Costs
PROP_FIRM_FEE_PCT = 0.0005  # 0.05% per side
ESTIMATED_SLIPPAGE_PCT = 0.0005 # 0.05% estimated slippage
TOTAL_COST_PCT = (PROP_FIRM_FEE_PCT * 2) + ESTIMATED_SLIPPAGE_PCT # Entry + Exit + Slippage

In [None]:
# 3. Data Fetching Functions

def fetch_spot_price(symbol='BTC/USDT', exchange_id='binance'):
    """
    Fetches the current spot price using CCXT.
    """
    try:
        exchange_class = getattr(ccxt, exchange_id)
        exchange = exchange_class()
        ticker = exchange.fetch_ticker(symbol)
        return ticker['last']
    except Exception as e:
        print(f"Error fetching spot price: {e}")
        # Fallback to Coinbase if Binance fails or as alternative
        if exchange_id == 'binance':
            return fetch_spot_price(symbol, 'coinbase')
        return None

def fetch_polymarket_events(slug_query="bitcoin"):
    """
    Fetches markets from Polymarket Gamma API based on a query.
    Using the public Gamma API events endpoint, filtering for open markets.
    """
    params = {
        "limit": 50,
        "active": "true",
        "closed": "false",
        "q": slug_query # Search query
    }
    try:
        response = requests.get(POLYMARKET_GAMMA_API, params=params)
        response.raise_for_status()
        return response.json()
    except Exception as e:
        print(f"Error fetching Polymarket data: {e}")
        return []

In [None]:
# 4. Parsing Logic (Regex Extraction)

def parse_strike_and_expiry(title):
    """
    Extracts Strike Price (K) and Expiry Date from a Polymarket contract title.
    Example: "Bitcoin hits $100k before Jan 3"
    """
    # Regex for price: $ followed by numbers, optionally with 'k' or 'm' or commas
    # Standardizing broadly to catch formats like $95,000, $100k, $100,000.00
    price_pattern = r'\$(\d{1,3}(?:,\d{3})*(?:\.\d+)?|\d+(?:\.\d+)?k?)'
    
    # Regex for date: flexible month matching
    # Example: "Jan 3", "October 31, 2024"
    date_pattern = r'(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d{1,2}(?:,?\s+\d{4})?'
    
    strike_match = re.search(price_pattern, title, re.IGNORECASE)
    date_match = re.search(date_pattern, title, re.IGNORECASE)
    
    strike = None
    expiry = None
    
    if strike_match:
        strike_str = strike_match.group(1).replace(',', '').lower()
        if 'k' in strike_str:
            strike = float(strike_str.replace('k', '')) * 1000
        else:
            strike = float(strike_str)

    if date_match:
        date_str = date_match.group(0)
        try:
            # Assuming current year if year is missing, logic needs care around year end
            current_year = datetime.now().year
            if str(current_year) not in date_str:
                date_str = f"{date_str}, {current_year}"
            
            # Basic parsing - in production use dateparser for robustness
            expiry = datetime.strptime(date_str, "%b %d, %Y")
        except ValueError:
            pass # Handle specific edge cases if needed

    return strike, expiry


In [None]:
# 5. Data Processing

def build_analysis_dataframe():
    events = fetch_polymarket_events("Bitcoin")
    spot_price = fetch_spot_price("BTC/USDT")
    
    data = []
    
    for event in events:
        markets = event.get('markets', [])
        for market in markets:
            title = event.get('title', '')
            
            # We assume we are looking for the "Yes" outcome token usually
            # Polymarket basic structure often involves Group outcomes logic or binary Yes/No
            # We will try to find the binary likelihood.
            
            # For simplicity, taking the last price/probability of the outcome that looks primary 
            # Or handling specifically binary markets.
            
            # Simplification: Assume 'outcomePrices' is available or 'lastTradePrice'
            # Using a mock placeholder logic if specific fields vary, adapted to standard API response
            try:
                probability = float(market.get('outcomePrices', [0, 0])[0]) # Usually ["Yes", "No"] or similar indices
                # Actually outcomePrices is usually a JSON string list like '["0.65", "0.35"]'
                import json
                probs = json.loads(market.get('outcomePrices', '["0","0"]'))
                probability = float(probs[0]) # Taking the first outcome probability (often 'Yes' or the main event)
            except:
                probability = 0.5

            strike, expiry = parse_strike_and_expiry(title)
            
            if strike:
                # Implied Price Calculation: P_implied = P_Poly * K
                implied_value = probability * strike
                
                # Arbitrage Deviation: D = P_Spot - P_implied
                # Note: K is the target Price. logic: 
                # If market is "Bitcoin > 100k", and prob is 0.1, implied is 10k.
                # If Spot is 95k, D = 95k - 10k = 85k. 
                # This specific formula requested by user might need interpretation context,
                # but we implement exactly as requested: D = Spot - Implied.
                
                arb_deviation = spot_price - implied_value
                
                data.append({
                    "contract_title": title,
                    "probability": probability,
                    "strike": strike,
                    "expiry": expiry,
                    "spot_price": spot_price,
                    "implied_value": implied_value,
                    "arb_deviation": arb_deviation
                })
    
    df = pd.DataFrame(data)
    return df

In [None]:
# 6. Backtesting Engine

def run_backtest_engine(df):
    """
    Simulates trade entry and net profit calculation based on the defined signals.
    Entry: D > 0 (Spot > Implied)
    Exit: At expiry (theoretical) or signal flip.
    Current implementation: Snapshot analysis of open potential profitable trades.
    """
    results = []
    
    for index, row in df.iterrows():
        entry_signal = row['arb_deviation'] > 0
        
        if entry_signal:
            # Estimating 'Profit' in this theoretical framework
            # If we buy the 'cheap' implied asset (The option) hoping for convergence?
            # Or is this simple spread capture?
            # Assuming we 'long' the implied value (Buy Poly contract) and 'short' the spot creates a hedge?
            # Given the request is "Identify if Poly leads Spot", we treat D as the signal magnitude.
            
            gross_arb_capture = row['arb_deviation'] # Dollar value difference
            
            # Cost subtraction
            # We pay fees on the notional size. 
            # Approximation: Fees on Spot Price + Fees on Poly Price
            cost = (row['spot_price'] + row['implied_value']) * TOTAL_COST_PCT
            
            net_profit_est = gross_arb_capture - cost
            
            results.append({
                "contract": row['contract_title'],
                "signal": "ENTRY",
                "gross_deviation": gross_arb_capture,
                "estimated_cost": cost,
                "net_profit_est": net_profit_est
            })
    
    return pd.DataFrame(results)

In [None]:
# Execute Analysis

print("Fetching Data...")
df_market = build_analysis_dataframe()

if not df_market.empty:
    print(f"Analyzed {len(df_market)} markets.")
    print("Running Backtest Simulation...")
    df_backtest = run_backtest_engine(df_market)
    
    # Display Top Opportunities
    print("\nTop Potential Arbitrage Opportunities:")
    print(df_backtest.sort_values(by='net_profit_est', ascending=False).head(10))
else:
    print("No matching markets found or parsed correctly.")

In [None]:
# 7. Visualization

if not df_market.empty:
    # Create Dual Axis Chart
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    # Sort by Strike for cleaner plotting line/scatter
    df_sorted = df_market.sort_values(by='strike')

    # 1. Spot Price (Constant Line)
    fig.add_trace(
        go.Scatter(x=df_sorted['strike'], y=df_sorted['spot_price'], name="Spot Price", mode='lines', line=dict(color='orange')),
        secondary_y=False
    )

    # 2. Implied Price (Scatter points)
    fig.add_trace(
        go.Scatter(x=df_sorted['strike'], y=df_sorted['implied_value'], name="Implied Value (Poly)", mode='markers', marker=dict(color='blue')),
        secondary_y=False
    )

    # 3. Spread/Deviation (Secondary Axis)
    fig.add_trace(
        go.Bar(x=df_sorted['strike'], y=df_sorted['arb_deviation'], name="Arbitrage Deviation ($)", opacity=0.3),
        secondary_y=True
    )

    fig.update_layout(
        title_text="Spot vs Implied Price Analysis",
        xaxis_title="Contract Strike Price ($)",
        legend=dict(x=0, y=1.1, orientation='h')
    )

    fig.update_yaxes(title_text="Price ($)", secondary_y=False)
    fig.update_yaxes(title_text="Deviation Magnitude ($)", secondary_y=True)

    fig.show()