### An attempt in creating in automated market scraper for all current closed and open markets
#### Multi-threaded event parser, (excluding MVE: sports parlays)

In [6]:
import requests

def get_clean_markets(status="open"):
    """Fetch single-leg markets only, excluding MVE combos."""
    base_url = "https://api.elections.kalshi.com/trade-api/v2/markets"
    all_markets = []
    cursor = None
    
    while True:
        params = {
            "limit": 1000,
            "status": status,
            "mve_filter": "exclude"  # Filter out multivariate/parlay markets
        }
        if cursor:
            params["cursor"] = cursor
            
        response = requests.get(base_url, params=params)
        data = response.json()
        
        markets = data.get("markets", [])
        all_markets.extend(markets)
        
        cursor = data.get("cursor")
        if not cursor or not markets:
            break
    
    return all_markets

markets = get_clean_markets("open")

# Now titles should be clean
for m in markets[:1000]:
    print(f"{m['ticker']}: {m['title']}")

KXBSLGAME-26FEB141000BJKBRS-BRS: Besiktas JK vs Bursaspor Winner?
KXBSLGAME-26FEB141000BJKBRS-BJK: Besiktas JK vs Bursaspor Winner?
KXBSLGAME-26FEB140730TOFBCB-TOF: Tofas SK Bursa vs Buyukcekmece Basketbol Winner?
KXBSLGAME-26FEB140730TOFBCB-BCB: Tofas SK Bursa vs Buyukcekmece Basketbol Winner?
KXBSLGAME-26FEB140500BAHESE-ESE: Bahcesehir Koleji vs Esenler Erokspor Winner?
KXBSLGAME-26FEB140500BAHESE-BAH: Bahcesehir Koleji vs Esenler Erokspor Winner?
KXBBSERIEAGAME-26FEB141400NAPTRI-TRI: Napoli Basket vs Pallacanestro Trieste 2004 Winner?
KXBBSERIEAGAME-26FEB141400NAPTRI-NAP: Napoli Basket vs Pallacanestro Trieste 2004 Winner?
KXBBSERIEAGAME-26FEB141215PALVAR-VAR: Pallacanestro Brescia vs Pallacanestro Varese Winner?
KXBBSERIEAGAME-26FEB141215PALVAR-PAL: Pallacanestro Brescia vs Pallacanestro Varese Winner?
KXGBLGAME-26FEB141115AEKPAO-PAO: BC AEK Athens vs PAOK BC Winner?
KXGBLGAME-26FEB141115AEKPAO-AEK: BC AEK Athens vs PAOK BC Winner?
KXGBLGAME-26FEB141115KARMAR-MAR: AS Karditsas vs B

In [None]:
"""
Kalshi Events Scraper
Pulls all open and closed events using multi-threading and saves to CSV.

Usage:
    python kalshi_scraper.py
    
Output:
    kalshi_events_YYYYMMDD_HHMMSS.csv
"""

import requests
import csv
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

BASE_URL = "https://api.elections.kalshi.com/trade-api/v2/events"

def get_session() -> requests.Session:
    """Create a session with retry logic."""
    session = requests.Session()
    retries = Retry(
        total=3,
        backoff_factor=0.5,
        status_forcelist=[429, 500, 502, 503, 504]
    )
    adapter = HTTPAdapter(max_retries=retries)
    session.mount("https://", adapter)
    return session

def fetch_events_page(session: requests.Session, status: str = None, cursor: str = None) -> dict:
    """Fetch a single page of events."""
    params = {"limit": 200}  # Events API max is likely 200, not 1000
    if status:
        params["status"] = status
    if cursor:
        params["cursor"] = cursor
    
    response = session.get(BASE_URL, params=params, timeout=30)
    response.raise_for_status()
    return response.json()

def fetch_all_events_for_status(status: str = None) -> list:
    """Fetch all events for a given status (handles pagination). If status is None, fetches all."""
    session = get_session()
    all_events = []
    cursor = None
    page = 0
    label = status if status else "all"
    
    while True:
        page += 1
        data = fetch_events_page(session, status, cursor)
        events = data.get("events", [])
        
        if not events:
            break
            
        # Add status to each event for tracking
        for event in events:
            event["scrape_status"] = label
        
        all_events.extend(events)
        print(f"  [{label}] Page {page}: fetched {len(events)} events (total: {len(all_events)})")
        
        cursor = data.get("cursor")
        if not cursor:
            break
        
        # Small delay to be nice to the API
        time.sleep(0.1)
    
    return all_events

def scrape_all_events() -> list:
    """Scrape all events using multi-threading for different statuses."""
    statuses = ["open", "closed", "settled"]
    all_events = []
    
    print(f"Starting scrape at {datetime.now().isoformat()}")
    print("-" * 50)
    
    # First try with status filters in parallel
    with ThreadPoolExecutor(max_workers=3) as executor:
        future_to_status = {
            executor.submit(fetch_all_events_for_status, status): status 
            for status in statuses
        }
        
        for future in as_completed(future_to_status):
            status = future_to_status[future]
            try:
                events = future.result()
                all_events.extend(events)
                print(f"  [{status}] Complete: {len(events)} events")
            except Exception as e:
                print(f"  [{status}] Error: {e}")
    
    # If status filters failed, try fetching all without filter
    if len(all_events) == 0:
        print("\nStatus filters failed. Trying without status filter...")
        try:
            all_events = fetch_all_events_for_status(None)
            print(f"  [all] Complete: {len(all_events)} events")
        except Exception as e:
            print(f"  [all] Error: {e}")
    
    print("-" * 50)
    print(f"Total events scraped: {len(all_events)}")
    
    return all_events

def events_to_csv(events: list, filename: str):
    """Save events to CSV file."""
    if not events:
        print("No events to save.")
        return
    
    # Define columns to extract
    columns = [
        "event_ticker",
        "series_ticker", 
        "title",
        "sub_title",
        "category",
        "mutually_exclusive",
        "available_on_brokers",
        "collateral_return_type",
        "strike_period",
        "scrape_status"
    ]
    
    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=columns, extrasaction="ignore")
        writer.writeheader()
        writer.writerows(events)
    
    print(f"Saved {len(events)} events to {filename}")

def main():
    # Scrape all events
    events = scrape_all_events()
    
    # Save to CSV with timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"kalshi_events_{timestamp}.csv"
    events_to_csv(events, filename)
    
    # Print summary by category
    print("\n" + "=" * 50)
    print("SUMMARY BY CATEGORY")
    print("=" * 50)
    
    categories = {}
    for event in events:
        cat = event.get("category", "Unknown")
        categories[cat] = categories.get(cat, 0) + 1
    
    for cat, count in sorted(categories.items(), key=lambda x: -x[1]):
        print(f"  {cat}: {count}")
    
    # Print summary by status
    print("\n" + "=" * 50)
    print("SUMMARY BY STATUS")
    print("=" * 50)
    
    statuses = {}
    for event in events:
        status = event.get("scrape_status", "Unknown")
        statuses[status] = statuses.get(status, 0) + 1
    
    for status, count in sorted(statuses.items(), key=lambda x: -x[1]):
        print(f"  {status}: {count}")
    
    return filename

if __name__ == "__main__":
    output_file = main()
    print(f"\nDone! Output: {output_file}")

Starting scrape at 2026-02-10T19:52:32.779852
--------------------------------------------------
  [open] Error: 400 Client Error: Bad Request for url: https://api.elections.kalshi.com/trade-api/v2/events?limit=1000&status=open
  [closed] Error: 400 Client Error: Bad Request for url: https://api.elections.kalshi.com/trade-api/v2/events?limit=1000&status=closed
--------------------------------------------------
Total events scraped: 0
No events to save.

SUMMARY BY CATEGORY

SUMMARY BY STATUS

Done! Output: market_data/kalshi_events_20260210_195232.csv
