# Polymarket Historical Data Scripts

Scripts to help download historical data from Polymarket from the Polymarket Analytics API

> Note:
> Polymarket Analytics doesn't have an official API but I was able to infer it by looking at the requests the sit was making.

> TODO:
> Should probably put everything into Claude and have it print out API docs

In [13]:
import traceback
import requests  
import pandas as pd
import os
from datetime import datetime
import copy
from src.config import config

ACTIVITY_TRADES_URL = f"{config.POLYMARKET_API_KEY}/api/activity-trades"


HEADERS = {
    "Accept": "*/*",
    "Accept-Encoding": "gzip, deflate",
    "Accept-Language": "en-US,en;q=0.9",
    "Content-Type": "application/json",
    "Cookie": f"privy-session=t; privy-token={config.POLYMARKET_PRIVY_TOKEN}",
    "Origin": "https://polymarketanalytics.com",
    "Priority": "u=1, i",
    "Sec-Ch-Ua": '"Not/A)Brand";v="8", "Chromium";v="126"',
    "Sec-Ch-Ua-Mobile": "?0",
    "Sec-Ch-Ua-Platform": '"Linux"',
    "Sec-Fetch-Dest": "empty",
    "Sec-Fetch-Mode": "cors",
    "Sec-Fetch-Site": "same-origin",
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
}




def fetch_market_prices(market_id):
    url = "https://polymarketanalytics.com/api/markets-prices"
    data = {"event_id":market_id}

    headers = copy.copy(HEADERS)
    headers["Referer"] = f"https://polymarketanalytics.com/markets/{market_id}"
    
    try:
        response = requests.post(url, headers=headers, json=data)
        response.raise_for_status()
        
        json_data = response.json()
        trades = json_data.get('data', [])
        
        df = pd.DataFrame(trades)
        # Convert trade_dttm to datetime for proper sorting and analysis
        #df['trade_dttm'] = pd.to_datetime(df['trade_dttm'])
        return df
        
    except Exception as e:
        print(f"  Error fetching {start_date} to {end_date}: {e}")
        return []
    

def fetch_positions(trader_id):
    url = "https://polymarketanalytics.com/api/traders-positions-history"
    data = {"trader_id": trader_id}

    headers = copy.copy(HEADERS)
    headers["Referer"] = f"https://polymarketanalytics.com/traders/{trader_id}"
    
    try:
        response = requests.post(url, headers=headers, json=data)
        response.raise_for_status()
        
        json_data = response.json()
        trades = json_data.get('data', [])
        
        df = pd.DataFrame(trades)
        # Convert trade_dttm to datetime for proper sorting and analysis
        df['trade_dt'] = pd.to_datetime(df['trade_dt'])
        df['insert_time'] = pd.to_datetime(df['insert_time'])
        return df
        
    except Exception as e:
        print(f"  Error fetching {start_date} to {end_date}: {e}")
        return []

def fetch_user_trades_for_date_range(trader_id, start_date, end_date):
    """
    Fetch trades for a specific date range.
    
    Args:
        trader_id (str): Trader ID to fetch data for
        start_date (str): Start date in YYYY-MM-DD format
        end_date (str): End date in YYYY-MM-DD format
        headers (dict): Request headers
    
    Returns:
        list: List of trade records
    """
    url = "https://polymarketanalytics.com/api/activity-trades"
    
    data = {
        "trader_id": trader_id,
        "sortBy": "trade_dttm",
        "sortDesc": True,
        "start_date": start_date,
        "end_date": end_date
    }
    
    try:
        response = requests.post(url, headers=HEADERS, json=data)
        response.raise_for_status()
        
        json_data = response.json()
        trades = json_data.get('data', [])
        
        print(f"  {start_date} to {end_date}: {len(trades)} trades")
        df = pd.DataFrame(trades)
        # Convert trade_dttm to datetime for proper sorting and analysis
        df['trade_dttm'] = pd.to_datetime(df['trade_dttm'])
        return df
        
    except Exception as e:
        print(f"  Error fetching {start_date} to {end_date}: {e}")
        return []


def fetch_market_trades_for_date_range(event_id, start_date, end_date):
    """
    Fetch trades for a specific date range.
    
    Args:
        event_id (str): Event id
        start_date (str): Start date in YYYY-MM-DD format
        end_date (str): End date in YYYY-MM-DD format
        headers (dict): Request headers
    
    Returns:
        list: List of trade records
    """
    url = "https://polymarketanalytics.com/api/activity-trades"
    
    data = {
        "event_id": event_id,
        "sortBy": "trade_dttm",
        "sortDesc": True,
        "start_date": start_date,
        "end_date": end_date
    }
    
    try:
        response = requests.post(url, headers=HEADERS, json=data)
        response.raise_for_status()
        
        json_data = response.json()
        trades = json_data.get('data', [])
        
        print(f"  {start_date} to {end_date}: {len(trades)} trades")
        df = pd.DataFrame(trades)
        # Convert trade_dttm to datetime for proper sorting and analysis
        df['trade_dttm'] = pd.to_datetime(df['trade_dttm'])
        return df
        
    except Exception as e:
        print(f"  Error fetching {start_date} to {end_date}: {e}")
        return []

#d = fetch_market_trades_for_date_range("16403", "2025-01-10 04:01:27", "2025-01-10 05:01:28")
# Example of fetching specific date range:

def fetch_price_history(clob_id, start_date=None, end_date=None, interval=None, fidelity=1):
    """
    Get price history for a specific token.
    
    Args:
        clob_id (str): CLOB token ID
        start_ts (int): Start timestamp (Unix, UTC)
        end_ts (int): End timestamp (Unix, UTC)
        interval (str): Time interval ("1m", "1w", "1d", "6h", "1h", "max")
        fidelity (int): Resolution in minutes (1 = 1 minute intervals)
    
    Returns:
        pd.DataFrame: Price history data
    """
    
    headers = {
        'Content-Type': 'application/json',
        'User-Agent': 'Mozilla/5.0 (compatible; PolymarketAPI/1.0)'
    }
    
    url = "https://clob.polymarket.com/prices-history"
    
    params = {
        'market': clob_id,
        'fidelity': fidelity
    }
    
    # Use either timestamp range OR interval (mutually exclusive)
    if start_date and end_date:
        params['startTs'] = int(datetime.strptime(start_date, '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=timezone.utc).timestamp())
        params['endTs'] = int(datetime.strptime(end_date, '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=timezone.utc).timestamp())
        
        #params['startTs'] = int(datetime.strptime(start_date, "%Y-%m-%d").timestamp())
        #params['endTs'] = int(datetime.strptime(end_date, "%Y-%m-%d").timestamp())
    elif interval:
        params['interval'] = interval
    else:
        params['interval'] = 'all'
    
    try:
        print(f"Fetching price history for token {clob_id}...")
        print(f"Parameters: {params}")
        
        response = requests.get(url, params=params, headers=headers)
        response.raise_for_status()
        
        data = response.json()
        
        if 'history' in data and data['history']:
            # Convert to DataFrame
            df = pd.DataFrame(data['history'])
            
            # Convert timestamp to datetime
            df['datetime'] = pd.to_datetime(df['t'], unit='s')
            df['price'] = df['p']
            
            # Sort by time
            df = df.sort_values('datetime').reset_index(drop=True)
            
            print(f"Retrieved {len(df)} price points")
            print(f"Date range: {df['datetime'].min()} to {df['datetime'].max()}")
            
            return df[['datetime', 'price', 't', 'p']]
        
        else:
            print("No price history data found")
            return pd.DataFrame()
            
    except Exception as e:
        print(f"Error fetching price history: {e}")
        return pd.DataFrame()


def fetch_events():
    url = f"https://gamma-api.polymarket.com/events?closed=true&include_chat=false"
    
    try:
        response = requests.get(url, headers=HEADERS)
        response.raise_for_status()
        
        #return response.json()

        return pd.DataFrame(response.json())

    except Exception as e:
        print(e)
        return []

def fetch_event(slug):
    url = f"https://gamma-api.polymarket.com/events/slug/{slug}?include_chat=false"
    
    try:
        response = requests.get(url, headers=HEADERS)
        response.raise_for_status()
        
        return response.json()
    except Exception as e:
        print(e)
        return []


fetch_events().columns

Index(['id', 'ticker', 'slug', 'title', 'description', 'resolutionSource',
       'startDate', 'creationDate', 'endDate', 'image', 'icon', 'active',
       'closed', 'archived', 'new', 'featured', 'restricted', 'liquidity',
       'volume', 'openInterest', 'sortBy', 'category', 'published_at',
       'createdAt', 'updatedAt', 'competitive', 'volume24hr', 'volume1wk',
       'volume1mo', 'volume1yr', 'liquidityAmm', 'liquidityClob',
       'commentCount', 'markets', 'series', 'tags', 'cyom', 'closedTime',
       'showAllOutcomes', 'showMarketImages', 'enableNegRisk', 'seriesSlug',
       'negRiskAugmented', 'pendingDeployment', 'deploying', 'subcategory',
       'updatedBy', 'commentsEnabled'],
      dtype='object')

In [140]:
import json
import pandas as pd

event = fetch_event("mlb-cin-chc-2025-05-31")

flattened = [
    {'clobTokenId': token_id, 
     'outcome': outcome,
     'conditionId': m['conditionId'], 
     'gameStartTime': m['gameStartTime'], 
     'marketId': m['id'],
     'marketStartDate': m['startDate'], 
     'marketEndDate': m['endDate'], 
     'startDate': event['startDate'],
     'endDate': event['endDate'],
     'gameStartTime': m['gameStartTime'], 
     'slug': event['slug'], 
     'title': event['title']
    }
    for m in event['markets'] 
    if 'clobTokenIds' in m
    for token_id, outcome in zip(json.loads(m['clobTokenIds']), json.loads(m['outcomes']))
]

a = [fetch_price_history(x['clobTokenId']).assign(**x) for x in flattened]
combined = pd.concat(a, join='inner')

combined

Fetching price history for token 3300671900186020957198642358934489067292273503243368402087506893833088463182...
Parameters: {'market': '3300671900186020957198642358934489067292273503243368402087506893833088463182', 'fidelity': 1, 'interval': 'all'}
Retrieved 233 price points
Date range: 2025-05-30 08:20:07 to 2025-05-31 23:00:07
Fetching price history for token 14315604171938534486956636006407315271580153618606169226574246432618720078259...
Parameters: {'market': '14315604171938534486956636006407315271580153618606169226574246432618720078259', 'fidelity': 1, 'interval': 'all'}
Retrieved 233 price points
Date range: 2025-05-30 08:20:07 to 2025-05-31 23:00:07


Unnamed: 0,datetime,price,t,p,clobTokenId,outcome,conditionId,gameStartTime,marketId,marketStartDate,marketEndDate,startDate,endDate,slug,title
0,2025-05-30 08:20:07,0.5000,1748593207,0.5000,3300671900186020957198642358934489067292273503...,Reds,0x6d6f388a856a387f19a5c9db3dda811f8bc3e46f99e0...,2025-05-31 18:20:00+00,548355,2025-05-30T08:01:01.277266Z,2025-06-07T18:20:00Z,2025-05-30T08:02:35.864221Z,2025-05-31T18:20:00Z,mlb-cin-chc-2025-05-31,Reds vs. Cubs
1,2025-05-30 08:30:07,0.5000,1748593807,0.5000,3300671900186020957198642358934489067292273503...,Reds,0x6d6f388a856a387f19a5c9db3dda811f8bc3e46f99e0...,2025-05-31 18:20:00+00,548355,2025-05-30T08:01:01.277266Z,2025-06-07T18:20:00Z,2025-05-30T08:02:35.864221Z,2025-05-31T18:20:00Z,mlb-cin-chc-2025-05-31,Reds vs. Cubs
2,2025-05-30 08:40:07,0.5000,1748594407,0.5000,3300671900186020957198642358934489067292273503...,Reds,0x6d6f388a856a387f19a5c9db3dda811f8bc3e46f99e0...,2025-05-31 18:20:00+00,548355,2025-05-30T08:01:01.277266Z,2025-06-07T18:20:00Z,2025-05-30T08:02:35.864221Z,2025-05-31T18:20:00Z,mlb-cin-chc-2025-05-31,Reds vs. Cubs
3,2025-05-30 08:50:07,0.5000,1748595007,0.5000,3300671900186020957198642358934489067292273503...,Reds,0x6d6f388a856a387f19a5c9db3dda811f8bc3e46f99e0...,2025-05-31 18:20:00+00,548355,2025-05-30T08:01:01.277266Z,2025-06-07T18:20:00Z,2025-05-30T08:02:35.864221Z,2025-05-31T18:20:00Z,mlb-cin-chc-2025-05-31,Reds vs. Cubs
4,2025-05-30 09:00:08,0.5000,1748595608,0.5000,3300671900186020957198642358934489067292273503...,Reds,0x6d6f388a856a387f19a5c9db3dda811f8bc3e46f99e0...,2025-05-31 18:20:00+00,548355,2025-05-30T08:01:01.277266Z,2025-06-07T18:20:00Z,2025-05-30T08:02:35.864221Z,2025-05-31T18:20:00Z,mlb-cin-chc-2025-05-31,Reds vs. Cubs
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
228,2025-05-31 22:20:07,0.9995,1748730007,0.9995,1431560417193853448695663600640731527158015361...,Cubs,0x6d6f388a856a387f19a5c9db3dda811f8bc3e46f99e0...,2025-05-31 18:20:00+00,548355,2025-05-30T08:01:01.277266Z,2025-06-07T18:20:00Z,2025-05-30T08:02:35.864221Z,2025-05-31T18:20:00Z,mlb-cin-chc-2025-05-31,Reds vs. Cubs
229,2025-05-31 22:30:07,0.9995,1748730607,0.9995,1431560417193853448695663600640731527158015361...,Cubs,0x6d6f388a856a387f19a5c9db3dda811f8bc3e46f99e0...,2025-05-31 18:20:00+00,548355,2025-05-30T08:01:01.277266Z,2025-06-07T18:20:00Z,2025-05-30T08:02:35.864221Z,2025-05-31T18:20:00Z,mlb-cin-chc-2025-05-31,Reds vs. Cubs
230,2025-05-31 22:40:07,0.9995,1748731207,0.9995,1431560417193853448695663600640731527158015361...,Cubs,0x6d6f388a856a387f19a5c9db3dda811f8bc3e46f99e0...,2025-05-31 18:20:00+00,548355,2025-05-30T08:01:01.277266Z,2025-06-07T18:20:00Z,2025-05-30T08:02:35.864221Z,2025-05-31T18:20:00Z,mlb-cin-chc-2025-05-31,Reds vs. Cubs
231,2025-05-31 22:50:07,0.9995,1748731807,0.9995,1431560417193853448695663600640731527158015361...,Cubs,0x6d6f388a856a387f19a5c9db3dda811f8bc3e46f99e0...,2025-05-31 18:20:00+00,548355,2025-05-30T08:01:01.277266Z,2025-06-07T18:20:00Z,2025-05-30T08:02:35.864221Z,2025-05-31T18:20:00Z,mlb-cin-chc-2025-05-31,Reds vs. Cubs


In [106]:
timestamp = datetime.now().strftime("%Y%m%d%H%M")
 # Create data directory if it doesn't exist
data_dir = "data"
os.makedirs(data_dir, exist_ok=True)
csv_filename = os.path.join('data', f"poly_market_prices_{d['event_id'][0]}_{timestamp}.csv")

# Save to CSV file
#csv_filename = f"polymarket_trades_{data['trader_id'][:8]}.csv"
d.to_csv(csv_filename, index=False)
print(f"\nData saved to: {csv_filename}")


NameError: name 'd' is not defined

In [61]:
import requests
import pandas as pd
import os
from datetime import datetime, timedelta
import time


def generate_date_ranges(start_date, end_date, chunk_hours=2):
    """
    Generate date ranges for iteration in hour chunks.
    
    Args:
        start_date (str): Overall start date (YYYY-MM-DD)
        end_date (str): Overall end date (YYYY-MM-DD)
        chunk_hours (int): Number of hours per chunk
    
    Returns:
        list: List of (start_datetime, end_datetime) tuples in ISO format
    """
    start = datetime.strptime(start_date, "%Y-%m-%d")
    end = datetime.strptime(end_date, "%Y-%m-%d").replace(hour=23, minute=59, second=59)
    
    date_ranges = []
    current_start = start
    
    while current_start < end:
        current_end = min(current_start + timedelta(hours=chunk_hours), end)
        date_ranges.append((
            current_start.strftime("%Y-%m-%dT%H:%M:%S"),
            current_end.strftime("%Y-%m-%dT%H:%M:%S")
        ))
        current_start = current_end + timedelta(seconds=1)  # Move to next second to avoid overlap
    
    return date_ranges


In [59]:
def fetch_all_trader_data(trader_id, start_date, end_date, chunk_hours=2, delay=0.5):
    """
    Fetch all trading data for a trader across a date range, using chunked requests.
    
    For very active traders, try 1-hour chunks
    df = fetch_all_trader_data(
        trader_id=trader_id,
        start_date="2025-05-30",
        end_date="2025-05-30",
        chunk_hours=1,
        delay=0.3  # Faster requests for smaller chunks
    )
     
    Args:
        trader_id (str): Trader ID to fetch data for
        start_date (str): Start date (YYYY-MM-DD)
        end_date (str): End date (YYYY-MM-DD)
        chunk_hours (int): Hours per API request (default: 2)
        delay (float): Delay between requests in seconds

    
    
    Returns:
        pd.DataFrame: Combined DataFrame with all trades
    """
    
    # Request headers (update with your current session)
    headers = {
        "Accept": "*/*",
        "Accept-Encoding": "gzip, deflate",
        "Accept-Language": "en-US,en;q=0.9",
        "Content-Type": "application/json",
        "Cookie": f"privy-session=t; privy-token={config.POLYMARKET_PRIVY_TOKEN}",
        "Origin": "https://polymarketanalytics.com",
        "Priority": "u=1, i",
        "Referer": "https://polymarketanalytics.com/traders/0xd218e474776403a330142299f7796e8ba32eb5c9",
        "Sec-Ch-Ua": '"Not/A)Brand";v="8", "Chromium";v="126"',
        "Sec-Ch-Ua-Mobile": "?0",
        "Sec-Ch-Ua-Platform": '"Linux"',
        "Sec-Fetch-Dest": "empty",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-origin",
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
    }
    
    print(f"Fetching data for trader {trader_id[:10]}... from {start_date} to {end_date}")
    print(f"Using {chunk_hours}-hour chunks with {delay}s delay between requests")
    
    # Generate date ranges
    date_ranges = generate_date_ranges(start_date, end_date, chunk_hours)
    print(f"Will make {len(date_ranges)} API requests")
    
    all_trades = []
    
    for i, (range_start, range_end) in enumerate(date_ranges, 1):
        print(f"Request {i}/{len(date_ranges)}: {range_start} to {range_end}", end=" ")
        
        trades = fetch_trades_for_date_range(trader_id, range_start, range_end, headers)
        all_trades.extend(trades)
        
        # Add delay between requests to be respectful
        if i < len(date_ranges):
            time.sleep(delay)
        
        # Progress update every 50 requests
        if i % 50 == 0:
            print(f"\n  --> Progress: {i}/{len(date_ranges)} requests completed, {len(all_trades)} total trades so far")
    
    print(f"\nTotal trades collected: {len(all_trades)}")
    
    if not all_trades:
        print("No trades found!")
        return pd.DataFrame()
    
    # Convert to DataFrame
    df = pd.DataFrame(all_trades)
    df['trade_dttm'] = pd.to_datetime(df['trade_dttm'])
    
    # Remove any duplicates (in case of overlapping date ranges)
    initial_count = len(df)
    df = df.drop_duplicates(subset=['trade_dttm', 'trader_id', 'amount', 'price', 'market_title', 'outcome'])
    final_count = len(df)
    
    if initial_count != final_count:
        print(f"Removed {initial_count - final_count} duplicate trades")
    
    # Sort by trade datetime
    df = df.sort_values('trade_dttm', ascending=False).reset_index(drop=True)
    
    print(f"Final dataset: {len(df)} unique trades")
    print(f"Date range: {df['trade_dttm'].min()} to {df['trade_dttm'].max()}")
    
    return df



fetch_all_trader_data(
    trader_id="0xb49f468c15c49783f2664c7198a4949ade1b12e6",
    start_date="2025-05-29",
    end_date="2025-05-30",
    chunk_hours=3,
    delay=0.3  # Faster requests for smaller chunks
)

Fetching data for trader 0xb49f468c... from 2025-05-29 to 2025-05-30
Using 3-hour chunks with 0.3s delay between requests
Will make 16 API requests
Request 1/16: 2025-05-29T00:00:00 to 2025-05-29T03:00:00 

TypeError: fetch_trades_for_date_range() takes 3 positional arguments but 4 were given

In [24]:
import os
from datetime import datetime

def create_csv(dataframe):
    timestamp = datetime.now().strftime("%Y%m%d%H%M")
     # Create data directory if it doesn't exist
    data_dir = "data"
    os.makedirs(data_dir, exist_ok=True)
    csv_filename = os.path.join('data', f"poly_user_activity_trades_{data['trader_id'][:8]}_{timestamp}.csv")
    
    # Save to CSV file
    #csv_filename = f"polymarket_trades_{data['trader_id'][:8]}.csv"
    df.to_csv(csv_filename, index=False)
    print(f"\nData saved to: {csv_filename}")

In [27]:
# Save data to CSVs for each trader

trader_ids = ["0xb49f468c15c49783f2664c7198a4949ade1b12e6", "0xd218e474776403a330142299f7796e8ba32eb5c9", "0xe3726a1b9c6ba2f06585d1c9e01d00afaedaeb38"]

df = fetch_activity_trades(id)

for id in trader_ids:
    df = fetch_activity_trades(id)
    # Convert trade_dttm to datetime for proper sorting and analysis
    df['trade_dttm'] = pd.to_datetime(df['trade_dttm'])
    
    # Find earliest and latest trade times
    earliest_trade = df['trade_dttm'].min()
    latest_trade = df['trade_dttm'].max()
    
    print(f"\nTrade time range:")
    print(f"Earliest trade: {earliest_trade}")
    print(f"Latest trade:   {latest_trade}")
    print(f"Time span:      {latest_trade - earliest_trade}")
    create_csv(df)

Status Code: 200
Number of trades returned: 1000

DataFrame shape: (1000, 13)

Trade time range:
Earliest trade: 2025-05-26 18:25:17
Latest trade:   2025-05-30 17:50:15
Time span:      3 days 23:24:58

Data saved to: data/poly_user_activity_trades_0xb49f46_202505301116.csv
Status Code: 200
Number of trades returned: 1000

DataFrame shape: (1000, 13)

Trade time range:
Earliest trade: 2025-05-30 13:34:41
Latest trade:   2025-05-30 18:14:43
Time span:      0 days 04:40:02

Data saved to: data/poly_user_activity_trades_0xb49f46_202505301116.csv
Status Code: 200
Number of trades returned: 1000

DataFrame shape: (1000, 13)

Trade time range:
Earliest trade: 2025-05-30 12:09:24
Latest trade:   2025-05-30 18:13:59
Time span:      0 days 06:04:35

Data saved to: data/poly_user_activity_trades_0xb49f46_202505301116.csv


In [31]:
# Get all files in data
trader_ids = ["0xb49f468c15c49783f2664c7198a4949ade1b12e6", "0xd218e474776403a330142299f7796e8ba32eb5c9", "0xe3726a1b9c6ba2f06585d1c9e01d00afaedaeb38"]

fetch_activity_trades(trader_ids[0])
all_files = os.listdir('data')
files = [f for f in all_files if f.endswith('.csv')]

['poly_user_activity_trades_0xb49f46_202505301114.csv',
 'poly_user_activity_trades_0xb49f46_202505301115.csv',
 'poly_user_activity_trades_0xb49f46_202505301116.csv',
 'poly_user_activity_trades_0xb49f46_202505301041.csv',
 'poly_user_activity_trades_0xd218e4_202505301037.csv']

In [None]:
import requests
import pandas as pd
import os
from datetime import datetime, timedelta
import time






def save_historical_data(trader_id, start_date, end_date, chunk_hours=2):
    """
    Fetch and save historical trading data for a trader.
    
    Args:
        trader_id (str): Trader ID
        start_date (str): Start date (YYYY-MM-DD)
        end_date (str): End date (YYYY-MM-DD)
        chunk_hours (int): Hours per request
    
    Returns:
        str: Filename of saved CSV
    """
    
    # Fetch the data
    df = fetch_all_trader_data(trader_id, start_date, end_date, chunk_hours)
    
    if df.empty:
        return None
    
    # Save to CSV
    data_dir = "data"
    os.makedirs(data_dir, exist_ok=True)
    
    timestamp = datetime.now().strftime("%Y%m%d%H%M")
    filename = f"polymarket_trades_{trader_id[:8]}_{start_date}_{end_date}_{timestamp}.csv"
    filepath = os.path.join(data_dir, filename)
    
    df.to_csv(filepath, index=False)
    print(f"\nData saved to: {filepath}")
    
    return filename

# Example usage:
# 
# # Fetch 1 day of data in 2-hour chunks (12 requests)
# trader_id = "0xd218e474776403a330142299f7796e8ba32eb5c9"
# filename = save_historical_data(
#     trader_id=trader_id,
#     start_date="2025-05-29", 
#     end_date="2025-05-30",
#     chunk_hours=2
# )
# 
# # For very active traders, try 1-hour chunks
# df = fetch_all_trader_data(
#     trader_id=trader_id,
#     start_date="2025-05-30",
#     end_date="2025-05-30",
#     chunk_hours=1,
#     delay=0.3  # Faster requests for smaller chunks
# )

In [None]:
import pandas as pd
import numpy as np

def analyze_market_maker_characteristics(df):
    
    """
    Analyze trading patterns to identify potential market makers.
    
    Args:
        df (pd.DataFrame): DataFrame with trade data
    
    Returns:
        pd.DataFrame: Analysis results by trader
    """
    
    # Group by trader for analysis
    trader_stats = df.groupby('trader_id').agg({
        'amount': ['count', 'mean', 'std', 'sum'],
        'value': ['mean', 'sum'],
        'side': lambda x: list(x),
        'market_title': 'nunique',
        'outcome': lambda x: list(x),
        'price': ['mean', 'std'],
        'trade_dttm': ['min', 'max']
    }).round(4)
    
    # Flatten column names
    trader_stats.columns = ['_'.join(col).strip() for col in trader_stats.columns]
    
    # Calculate market maker indicators
    mm_indicators = []
    
    for trader_id, row in trader_stats.iterrows():
        trader_trades = df[df['trader_id'] == trader_id]
        
        # 1. LARGE ORDER SIZES
        avg_trade_size = row['amount_mean']
        trade_size_percentile = (trader_trades['amount'] >= trader_trades['amount'].quantile(0.8)).mean()
        
        # 2. BOTH SIDES OF TRADE
        sides = trader_trades['side'].value_counts()
        buy_ratio = sides.get('buy', 0) / len(trader_trades)
        sell_ratio = sides.get('sell', 0) / len(trader_trades)
        both_sides_score = 1 - abs(buy_ratio - sell_ratio)  # Closer to 1 = more balanced
        
        # 3. DIVERSIFICATION (across markets and outcomes)
        unique_markets = row['market_title_nunique']
        total_trades = row['amount_count']
        market_diversification = unique_markets / total_trades if total_trades > 0 else 0
        
        # Outcome diversification within markets
        outcome_balance_scores = []
        for market in trader_trades['market_title'].unique():
            market_trades = trader_trades[trader_trades['market_title'] == market]
            if len(market_trades) > 1:
                outcome_counts = market_trades['outcome'].value_counts()
                # Calculate balance between Yes/No outcomes
                yes_ratio = outcome_counts.get('Yes', 0) / len(market_trades)
                no_ratio = outcome_counts.get('No', 0) / len(market_trades)
                balance_score = 1 - abs(yes_ratio - no_ratio)
                outcome_balance_scores.append(balance_score)
        
        avg_outcome_balance = np.mean(outcome_balance_scores) if outcome_balance_scores else 0
        
        # 4. TRADING FREQUENCY
        date_range = (pd.to_datetime(row['trade_dttm_max']) - pd.to_datetime(row['trade_dttm_min'])).days
        trades_per_day = total_trades / max(date_range, 1)
        
        # 5. POSITION SIZE RELATIVE TO VOLUME
        # Calculate net position vs total volume for each market
        position_ratios = []
        for market in trader_trades['market_title'].unique():
            market_trades = trader_trades[trader_trades['market_title'] == market]
            
            # Calculate net position (buys - sells) vs total volume
            total_volume = market_trades['amount'].sum()
            buy_volume = market_trades[market_trades['side'] == 'buy']['amount'].sum()
            sell_volume = market_trades[market_trades['side'] == 'sell']['amount'].sum()
            net_position = abs(buy_volume - sell_volume)
            
            if total_volume > 0:
                position_ratio = net_position / total_volume
                position_ratios.append(position_ratio)
        
        avg_position_ratio = np.mean(position_ratios) if position_ratios else 1
        
        # 6. PRICE SPREAD BEHAVIOR
        # Look for trades at different price levels (suggesting liquidity provision)
        price_std = row['price_std'] if not pd.isna(row['price_std']) else 0
        price_diversity = min(price_std * 10, 1)  # Normalize to 0-1 scale
        
        # COMPOSITE MARKET MAKER SCORE
        # Weight different factors
        mm_score = (
            min(trade_size_percentile * 0.15, 0.15) +  # Large orders (15%)
            both_sides_score * 0.25 +                   # Both sides (25%)
            min(market_diversification * 5, 0.20) +     # Market diversity (20%)
            avg_outcome_balance * 0.15 +                # Outcome balance (15%)
            min(trades_per_day / 10, 0.15) +            # Frequency (15%)
            (1 - avg_position_ratio) * 0.10             # Low net positions (10%)
        )
        
        mm_indicators.append({
            'trader_id': trader_id,
            'total_trades': total_trades,
            'avg_trade_size': avg_trade_size,
            'both_sides_score': both_sides_score,
            'buy_ratio': buy_ratio,
            'sell_ratio': sell_ratio,
            'unique_markets': unique_markets,
            'market_diversification': market_diversification,
            'avg_outcome_balance': avg_outcome_balance,
            'trades_per_day': trades_per_day,
            'avg_position_ratio': avg_position_ratio,
            'price_diversity': price_diversity,
            'mm_score': mm_score,
            'likely_mm': mm_score > 0.6  # Threshold for market maker classification
        })
    
    return pd.DataFrame(mm_indicators).sort_values('mm_score', ascending=False)

def identify_market_makers(df, top_n=10):
    """
    Identify top potential market makers from trade data.
    
    Args:
        df (pd.DataFrame): Trade data
        top_n (int): Number of top market makers to return
    
    Returns:
        pd.DataFrame: Top market makers with their characteristics
    """
    
    # Ensure trade_dttm is datetime
    if 'trade_dttm' in df.columns:
        df['trade_dttm'] = pd.to_datetime(df['trade_dttm'])
    
    # Analyze all traders
    mm_analysis = analyze_market_maker_characteristics(df)
    
    # Filter for likely market makers
    likely_mms = mm_analysis[mm_analysis['likely_mm'] == True]
    
    print(f"Found {len(likely_mms)} likely market makers out of {len(mm_analysis)} total traders")
    print(f"\nTop {top_n} Market Makers:")
    print("=" * 80)
    
    top_mms = mm_analysis.head(top_n)
    
    for i, (_, trader) in enumerate(top_mms.iterrows(), 1):
        print(f"{i:2d}. Trader: {trader['trader_id'][:10]}...")
        print(f"    MM Score: {trader['mm_score']:.3f}")
        print(f"    Total Trades: {trader['total_trades']:,}")
        print(f"    Both Sides Score: {trader['both_sides_score']:.3f} (Buy: {trader['buy_ratio']:.2%}, Sell: {trader['sell_ratio']:.2%})")
        print(f"    Markets: {trader['unique_markets']}, Avg Position Ratio: {trader['avg_position_ratio']:.3f}")
        print(f"    Trades/Day: {trader['trades_per_day']:.1f}")
        print()
    
    return mm_analysis

# Example usage:
# df = read_polymarket_csv('your_file.csv')
# market_makers = identify_market_makers(df, top_n=15)
# 
# # View detailed results
# print(market_makers[['trader_id', 'mm_score', 'total_trades', 'both_sides_score', 'unique_markets']].head(10))