# Polymarket API Filter

For our prediction market aggregator we curate markets that are interesting on polymarket. This is because we are just at a demo level of maturity at this point and need to hold the users hand. 

This notebook provides the tools for searching and filtering interesting markets

In [1]:
import requests
import pandas as pd
from typing import List, Dict, Optional, Any
import time

In [2]:

class PolymarketAPI:
    def __init__(self, base_url="https://gamma-api.polymarket.com"):
        self.base_url = base_url
        self.session = requests.Session()
        self.MAX_RESULTS = 500
        
    def search_events(self, 
                     keywords: Optional[str] = None, 
                     min_liquidity: float = 100, 
                     closed: bool = False, 
                     active: bool = True,
                     limit: int = 100,
                     max_results: int = 500,
                     search_description: bool = True) -> List[Dict]:
        """
        Search for events with pagination support and description search.
        
        Args:
            keywords: Search terms for event title and description
            min_liquidity: Minimum liquidity threshold
            closed: Include closed events
            active: Include active events
            limit: Number of results per page
            max_results: Maximum total results to return
            search_description: Whether to also search in event descriptions
        """
        all_events = []
        current_offset = 0
        max_results = min(max_results, self.MAX_RESULTS)
        
        while len(all_events) < max_results:
            params = {
                "closed": closed,
                "active": active,
                "liquidity_num_min": min_liquidity,
                "limit": limit,
                "offset": current_offset
            }
            
            response = self.session.get(f"{self.base_url}/events", params=params)
            
            if response.status_code != 200:
                raise Exception(f"API request failed with status {response.status_code}")
                
            events = response.json()
            
            if not events:  # No more results
                break
            
            # Filter events based on keywords if provided
            if keywords:
                filtered_events = []
                keywords_lower = keywords.lower()
                for event in events:
                    title = event.get('title', '').lower()
                    description = event.get('description', '').lower() if search_description else ''
                    
                    if keywords_lower in title or (search_description and keywords_lower in description):
                        filtered_events.append(event)
                events = filtered_events
            
            all_events.extend(events)
            current_offset += limit
            time.sleep(0.1)  # Add small delay between requests
            
        return all_events[:max_results]

    def format_event_data(self, event: Dict) -> Dict:
        """Enhanced event data formatting with comprehensive market data extraction"""
        data = {
            'event_id': event.get('id', ''),
            'title': event.get('title', ''),
            'description': event.get('description', ''),
            'liquidity': float(event.get('liquidity', 0)),
            'volume': float(event.get('volume', 0)),
            'volume24hr': float(event.get('volume24hr', 0)),
            'endDate': self.parse_datetime(event.get('endDate')),
            'startDate': self.parse_datetime(event.get('startDate')),
            'createdAt': self.parse_datetime(event.get('createdAt')),
            'updatedAt': self.parse_datetime(event.get('updatedAt')),
            'slug': event.get('slug', ''),
            'url': f"https://polymarket.com/event/{event.get('slug', '')}",
            'active': event.get('active', False),
            'closed': event.get('closed', False),
            'total_markets': len(event.get('markets', [])),
            'markets': []
        }
        
        # Extract tags if present
        if event.get('tags'):
            data['tags'] = [tag.get('label') for tag in event['tags']]
            data['tags_str'] = ', '.join(data['tags'])
        
        if event.get('markets'):
            market_summaries = []
            for market in event['markets']:
                market_data = {
                    'market_id': market.get('id', ''),
                    'question': market.get('question', ''),
                    'condition_id': market.get('conditionId', ''),
                    'yes_token_id': None,
                    'no_token_id': None,
                    'liquidity': float(market.get('liquidity', 0)),
                    'volume': float(market.get('volume', 0)),
                    'volume24hr': float(market.get('volume24hr', 0)),
                    'resolution_source': market.get('resolutionSource', ''),
                    'end_date': self.parse_datetime(market.get('endDate')),
                    'start_date': self.parse_datetime(market.get('startDate')),
                    'active': market.get('active', False),
                    'closed': market.get('closed', False)
                }
                
                # Extract token IDs
                if market.get('clobTokenIds'):
                    try:
                        token_ids = eval(market['clobTokenIds'])
                        market_data['yes_token_id'] = token_ids[0]
                        market_data['no_token_id'] = token_ids[1]
                    except:
                        pass
                
                # Extract prices
                if market.get('outcomePrices'):
                    try:
                        prices = eval(market['outcomePrices'])
                        label = market.get('groupItemTitle', 'option')
                        if label:
                            market_data['label'] = label
                            market_data['yes_price'] = float(prices[0])
                            market_data['no_price'] = float(prices[1])
                            # Also add to main data for backwards compatibility
                            data[f'{label}_yes'] = float(prices[0])
                            data[f'{label}_no'] = float(prices[1])
                    except:
                        pass
                
                data['markets'].append(market_data)
                # Create a summary string for this market
                market_summary = f"{market_data['question']} (Yes: {market_data.get('yes_price', 'N/A'):.3f}, No: {market_data.get('no_price', 'N/A'):.3f})"
                market_summaries.append(market_summary)
            
            # Add a summary of all markets
            data['markets_summary'] = '\n'.join(market_summaries)
                        
        return data

    def analyze_events(self, events: List[Dict]) -> pd.DataFrame:
        """Convert events to DataFrame with enhanced analysis"""
        formatted_events = [self.format_event_data(event) for event in events]
        df = pd.DataFrame(formatted_events)
        
        # Convert numeric columns
        numeric_cols = ['liquidity', 'volume', 'volume24hr', 'total_markets']
        for col in numeric_cols:
            if col in df.columns:
                df[col] = pd.to_numeric(df[col], errors='coerce').round(2)
        
        # Convert price columns
        price_cols = [col for col in df.columns if col.endswith('_yes') or col.endswith('_no')]
        for col in price_cols:
            if col in df.columns:
                df[col] = pd.to_numeric(df[col], errors='coerce').round(4)
        
        # Reorder columns for better readability
        preferred_order = ['event_id', 'title', 'description', 'markets_summary', 'liquidity', 
                         'volume', 'volume24hr', 'total_markets', 'tags_str', 'startDate', 
                         'endDate', 'createdAt', 'updatedAt', 'active', 'closed', 'url']
        
        # Get all columns that exist in the DataFrame
        available_cols = [col for col in preferred_order if col in df.columns]
        # Add any remaining columns that weren't in preferred_order
        remaining_cols = [col for col in df.columns if col not in available_cols]
        
        df = df[available_cols + remaining_cols]
        
        return df.sort_values('volume', ascending=False)

    def parse_datetime(self, date_str: Optional[str]) -> Optional[pd.Timestamp]:
        """Parse datetime strings in various formats returned by the API."""
        if not date_str:
            return None
            
        try:
            # Handle ISO format with 'Z' timezone indicator
            if date_str.endswith('Z'):
                date_str = date_str[:-1] + '+00:00'
            return pd.to_datetime(date_str, format='ISO8601')
        except:
            try:
                # Fallback to flexible parsing
                return pd.to_datetime(date_str)
            except:
                return None

    def print_event_summary(self, df: pd.DataFrame, include_markets: bool = True) -> None:
        """Print a readable summary of events."""
        print(f"\nFound {len(df)} events:")
        print("-" * 80)
        
        for _, row in df.iterrows():
            print(f"Event: {row['title']}")
            print(f"ID: {row['event_id']}")
            if row.get('description'):
                print(f"Description: {row['description'][:200]}...")
            print(f"Volume: ${row['volume']:,.2f}")
            print(f"Liquidity: ${row['liquidity']:,.2f}")
            if row.get('tags_str'):
                print(f"Tags: {row['tags_str']}")
            print(f"Active: {row['active']}, Closed: {row['closed']}")
            print(f"URL: {row['url']}")
            
            if include_markets and row.get('markets_summary'):
                print("\nMarkets:")
                print(row['markets_summary'])
            
            print("-" * 80)
    def format_market_summary(self, market_data: Dict) -> str:
        """Format market summary with proper price handling"""
        yes_price = market_data.get('yes_price')
        no_price = market_data.get('no_price')
        
        yes_display = f"{yes_price:.3f}" if isinstance(yes_price, (int, float)) else "N/A"
        no_display = f"{no_price:.3f}" if isinstance(no_price, (int, float)) else "N/A"
        
        return f"{market_data['question']} (Yes: {yes_display}, No: {no_display})"

    def format_event_data(self, event: Dict) -> Dict:
        """Enhanced event data formatting with comprehensive market data extraction"""
        data = {
            'event_id': event.get('id', ''),
            'title': event.get('title', ''),
            'description': event.get('description', ''),
            'liquidity': float(event.get('liquidity', 0)),
            'volume': float(event.get('volume', 0)),
            'volume24hr': float(event.get('volume24hr', 0)),
            'endDate': self.parse_datetime(event.get('endDate')),
            'startDate': self.parse_datetime(event.get('startDate')),
            'createdAt': self.parse_datetime(event.get('createdAt')),
            'updatedAt': self.parse_datetime(event.get('updatedAt')),
            'slug': event.get('slug', ''),
            'url': f"https://polymarket.com/event/{event.get('slug', '')}",
            'active': event.get('active', False),
            'closed': event.get('closed', False),
            'total_markets': len(event.get('markets', [])),
            'markets': []
        }
        
        # Extract tags if present
        if event.get('tags'):
            data['tags'] = [tag.get('label') for tag in event['tags']]
            data['tags_str'] = ', '.join(data['tags'])
        
        if event.get('markets'):
            market_summaries = []
            for market in event['markets']:
                market_data = {
                    'market_id': market.get('id', ''),
                    'question': market.get('question', ''),
                    'condition_id': market.get('conditionId', ''),
                    'yes_token_id': None,
                    'no_token_id': None,
                    'liquidity': float(market.get('liquidity', 0)),
                    'volume': float(market.get('volume', 0)),
                    'volume24hr': float(market.get('volume24hr', 0)),
                    'resolution_source': market.get('resolutionSource', ''),
                    'end_date': self.parse_datetime(market.get('endDate')),
                    'start_date': self.parse_datetime(market.get('startDate')),
                    'active': market.get('active', False),
                    'closed': market.get('closed', False)
                }
                
                # Extract token IDs
                if market.get('clobTokenIds'):
                    try:
                        token_ids = eval(market['clobTokenIds'])
                        market_data['yes_token_id'] = token_ids[0]
                        market_data['no_token_id'] = token_ids[1]
                    except:
                        pass
                
                # Extract prices
                if market.get('outcomePrices'):
                    try:
                        prices = eval(market['outcomePrices'])
                        label = market.get('groupItemTitle', 'option')
                        if label:
                            market_data['label'] = label
                            market_data['yes_price'] = float(prices[0])
                            market_data['no_price'] = float(prices[1])
                            # Also add to main data for backwards compatibility
                            data[f'{label}_yes'] = float(prices[0])
                            data[f'{label}_no'] = float(prices[1])
                    except:
                        pass
                
                data['markets'].append(market_data)
                market_summaries.append(self.format_market_summary(market_data))
            
            # Add a summary of all markets
            data['markets_summary'] = '\n'.join(market_summaries)
                        
        return data

In [None]:
# Example usage
api = PolymarketAPI()

# Search for high liquidity election events
events = api.search_events(keywords="eagles", min_liquidity=10000)
df = api.analyze_events(events)

print("\nEvent summary:")
display(df[['event_id', 'title', 'liquidity', 'volume24hr']].tail())

if not df.empty:
    first_event = df.iloc[2]
    print(f"\nMarket details for {first_event['event_id']} event:")
    for market in first_event['markets']:
        print(f"\nMarket ID: {market['market_id']}")
        # print(f"Question: {market['question']}")
        # print(f"Condition ID: {market['condition_id']}")
        # print(f"YES Token ID: {market['yes_token_id']}")
        # print(f"NO Token ID: {market['no_token_id']}")

In [6]:
# Example Max Liquidity
api = PolymarketAPI()

# Search for events 
events = api.search_events(min_liquidity=50000, max_results=500)
df = api.analyze_events(events)

print("\n=== EVENT SUMMARY ===")
print(f"Total Events Found: {len(df)}")
print(f"Total Liquidity Across All Events: ${df['liquidity'].sum():,.2f}")
print(f"Total 24hr Volume Across All Events: ${df['volume24hr'].sum():,.2f}")

# Top 5 by liquidity
print("\nTop 5 Events by Liquidity:")
print("-" * 100)
top_events = df.nlargest(5, 'liquidity')
for idx, event in top_events.iterrows():
    print(f"Title: {event['title']}")
    print(f"Event ID: {event['event_id']}")
    print(f"Liquidity: ${event['liquidity']:,.2f}")
    print(f"24hr Volume: ${event['volume24hr']:,.2f}")
    print("-" * 100)

# Details of highest liquidity event
highest_liquidity_event = df.loc[df['liquidity'].idxmax()]
print("\n=== HIGHEST LIQUIDITY EVENT DETAILS ===")
print(f"Title: {highest_liquidity_event['title']}")
print(f"Event ID: {highest_liquidity_event['event_id']}")
print(f"Liquidity: ${highest_liquidity_event['liquidity']:,.2f}")
print(f"24hr Volume: ${highest_liquidity_event['volume24hr']:,.2f}")

print("\nMarkets in this event:")
for market in highest_liquidity_event['markets']:
    print("\n" + "-" * 50)
    print(f"Market ID: {market['market_id']}")
    print(f"Question: {market['question']}")
    if 'yes_price' in market and 'no_price' in market:
        print(f"Current Prices - Yes: ${market['yes_price']:.3f}, No: ${market['no_price']:.3f}")
    print(f"Market Liquidity: ${market['liquidity']:,.2f}")


print("\n=== MARKETS CLOSEST TO EVEN MONEY ACROSS ALL EVENTS ===")

# Collect all markets across all events
all_markets = []
for _, event in df.iterrows():
    for market in event['markets']:
        if 'yes_price' in market:
            # Calculate distance from 50%
            distance_from_even = abs(0.5 - market['yes_price'])
            all_markets.append({
                'event_title': event['title'],
                'market_id': market['market_id'],
                'question': market['question'],
                'yes_price': market['yes_price'],
                'no_price': market['no_price'],
                'liquidity': market['liquidity'],
                'volume24hr': event['volume24hr'],
                'distance_from_even': distance_from_even
            })

# Sort by distance from even money and filter for liquid markets
sorted_markets = sorted(all_markets, key=lambda x: x['distance_from_even'])

# Print top 10 closest to even money
print("\nTop 10 Most Even Markets:")
for market in sorted_markets[:10]:
    print("-" * 100)
    print(f"Event: {market['event_title']}")
    print(f"Question: {market['question']}")
    print(f"YES: ${market['yes_price']:.3f} ({market['yes_price']*100:.1f}%)")
    print(f"NO:  ${market['no_price']:.3f} ({market['no_price']*100:.1f}%)")
    print(f"Market Liquidity: ${market['liquidity']:,.2f}")
    print(f"24hr Volume: ${market['volume24hr']:,.2f}")


=== EVENT SUMMARY ===
Total Events Found: 500
Total Liquidity Across All Events: $83,269,188.27
Total 24hr Volume Across All Events: $59,350,780.18

Top 5 Events by Liquidity:
----------------------------------------------------------------------------------------------------
Title: Champions League Winner
Event ID: 12585
Liquidity: $7,707,970.43
24hr Volume: $1,896,319.87
----------------------------------------------------------------------------------------------------
Title: Super Bowl Champion 2025
Event ID: 11439
Liquidity: $7,227,205.99
24hr Volume: $1,743,133.19
----------------------------------------------------------------------------------------------------
Title: Premier League Winner
Event ID: 12483
Liquidity: $6,761,785.32
24hr Volume: $472,817.86
----------------------------------------------------------------------------------------------------
Title: NBA Champion
Event ID: 12815
Liquidity: $6,397,733.65
24hr Volume: $760,023.53
---------------------------------------

In [8]:
# Initialize API
api = PolymarketAPI()

# Search for events with minimum liquidity
events = api.search_events(min_liquidity=50000, max_results=500)
df = api.analyze_events(events)

print("\n=== EVENT SUMMARY ===")
print(f"Total Events Found: {len(df)}")
print(f"Total Liquidity Across All Events: ${df['liquidity'].sum():,.2f}")
print(f"Total 24hr Volume Across All Events: ${df['volume24hr'].sum():,.2f}")

# Collect all markets across all events
all_markets = []
for _, event in df.iterrows():
    for market in event['markets']:
        if 'yes_price' in market:
            # Calculate distance from 50%
            distance_from_even = abs(0.5 - market['yes_price'])
            all_markets.append({
                'event_title': event['title'],
                'event_id': event['event_id'],
                'market_id': market['market_id'],
                'question': market['question'],
                'yes_price': market['yes_price'],
                'no_price': market['no_price'],
                'liquidity': market['liquidity'],
                'volume24hr': event['volume24hr'],
                'distance_from_even': distance_from_even,
                'yes_token_id': market.get('yes_token_id'),
                'no_token_id': market.get('no_token_id')
            })

# Filter for markets with minimum liquidity
MIN_MARKET_LIQUIDITY = 50000  # Adjust this threshold as needed
liquid_markets = [m for m in all_markets if m['liquidity'] >= MIN_MARKET_LIQUIDITY]

# Sort by distance from even money
sorted_markets = sorted(liquid_markets, key=lambda x: x['distance_from_even'])

print(f"\n=== TOP MARKETS BY CLOSENESS TO EVEN MONEY (Min Liquidity: ${MIN_MARKET_LIQUIDITY:,}) ===")
for market in sorted_markets[:10]:
    print("\n" + "=" * 100)
    print(f"Event: {market['event_title']}")
    print(f"Event ID: {market['event_id']}")
    print(f"Question: {market['question']}")
    print(f"Market ID: {market['market_id']}")
    print(f"Token IDs:")
    print(f"  YES Token: {market['yes_token_id']}")
    print(f"  NO Token:  {market['no_token_id']}")
    print(f"Odds:")
    print(f"  YES: ${market['yes_price']:.3f} ({market['yes_price']*100:.1f}%)")
    print(f"  NO:  ${market['no_price']:.3f} ({market['no_price']*100:.1f}%)")
    print(f"Market Stats:")
    print(f"  Liquidity: ${market['liquidity']:,.2f}")
    print(f"  24hr Volume: ${market['volume24hr']:,.2f}")
    print(f"  Distance from Even: {market['distance_from_even']*100:.1f}%")


=== EVENT SUMMARY ===
Total Events Found: 500
Total Liquidity Across All Events: $83,722,051.67
Total 24hr Volume Across All Events: $59,278,042.73

=== TOP MARKETS BY CLOSENESS TO EVEN MONEY (Min Liquidity: $50,000) ===

Event: Premier League Winner
Event ID: 12483
Question: Liverpool wins the Premier League?
Market ID: 506747
Token IDs:
  YES Token: 6856495334599070327400479404766108364224261454141327193457127201769494631355
  NO Token:  83900161665955113552429010729973027044975695664005576591523498118851571751116
Odds:
  YES: $0.473 (47.3%)
  NO:  $0.526 (52.6%)
Market Stats:
  Liquidity: $78,280.29
  24hr Volume: $474,437.88
  Distance from Even: 2.7%

Event: What price will Bitcoin hit in December?
Event ID: 14992
Question: Will Bitcoin reach $110,000 in December?
Market ID: 514368
Token IDs:
  YES Token: 1790971926816858923434249332892380360487842724102721934198820945656996137837
  NO Token:  103371696953468657853877736267623387818711712744767220628379059667622124131913
Odds:
  

In [8]:
# Initialize API
api = PolymarketAPI()

# Search for all election-related events
events = api.search_events(
    keywords="nfl", 
    min_liquidity=1000,
    max_results=500,
    search_description=True
)

# Convert to DataFrame with enhanced data
df = api.analyze_events(events)

# Print a nice summary
api.print_event_summary(df)

# Show specific columns
display(df[['event_id', 'token_id','title', 'liquidity', 'volume', 'total_markets', 'tags_str']].head(10))

['57613675663002786187100495641703103623912769470725101760865454712003201464678', '67641413617747536816198885240218904475506067596754599591449910952773445575688']
['15111941596932895954677772272152206580074877033299131921872776208279526696528', '10333729307966892400283245646002016244400388166966478557176425492447287135958']
['108088740483782318629738128403170575886134396631626833192005694005897326590536', '81048626842252155075408558219969802310092673640514672742299690414978571138044']
['27022372368383543064481750859855269165672105698623980398313154062641542552737', '94513685885046596237970421727185907352023933489194590852315431213856189045613']
['50081890939427596846356759443654884083585178151602920179968167301313381484122', '110980129781644931683328732365235837300976259753815433967742702677995780751425']
['67077938469318346081196615742488162829708899297173997697166881390765322577705', '88121205240930498431532174504276013496130487256624068851886620153864182247811']
['987770406476769356

KeyError: "['token_id'] not in index"