# Populate Market Dataset (Interactive)

This notebook helps you populate `outputs/market_snapshots.jsonl` with **real market data**.

### Prerequisite
You must have your `KALSHI_ACCESS_KEY_ID` ready. This is the UUID that came with your `FebuaruyAPIKalshi.txt` key file.

In [None]:
import os
import json
import time
import sys
from pathlib import Path

# Ensure project root is in path
PROJECT_ROOT = Path("..").resolve()
sys.path.append(str(PROJECT_ROOT))

from prediction_agent.api.kalshi_client import KalshiClient
from prediction_agent.config import OUTPUTS_DIR

In [None]:
# ── STEP 1: Set Credentials ───────────────────

# Check if ID is already in environment
key_id = os.environ.get("KALSHI_ACCESS_KEY_ID")

if not key_id:
    print("KALSHI_ACCESS_KEY_ID not found in environment.")
    key_id = input("Please paste your Kalshi Access Key ID (UUID): ").strip()
    os.environ["KALSHI_ACCESS_KEY_ID"] = key_id
    print(f"Set Key ID: {key_id}")
else:
    print(f"Found Key ID in env: {key_id}")

print("Initializing Client...")
client = KalshiClient()

# Simple connectivity test
try:
    markets = client.get_active_markets(limit=1)
    if markets and markets[0]['market_id'].startswith("STUB-"):
        print("⚠️ WARNING: Client is still using STUB data. Check your Key ID.")
    else:
        print("✅ Success! Connected to real Kalshi API.")
except Exception as e:
    print(f"❌ Error: {e}")

In [None]:
# ── STEP 2: Populate Dataset ──────────────────

TARGET_UNIQUE = 25
OUTPUT_FILE = OUTPUTS_DIR / "market_snapshots.jsonl"
collected_titles = set()

# Pre-load existing
if OUTPUT_FILE.exists():
    with open(OUTPUT_FILE, "r", encoding="utf-8") as f:
        for line in f:
            try:
                data = json.loads(line)
                collected_titles.add(data.get("title"))
            except:
                pass

print(f"Current dataset has {len(collected_titles)} unique titles.")
MAX_LOOPS = 10

for i in range(MAX_LOOPS):
    if len(collected_titles) >= TARGET_UNIQUE:
        print("✅ Target reached!")
        break

    print(f"Batch {i+1}: Fetching markets...")
    markets = client.get_active_markets(limit=100)
    
    new_records = []
    for m in markets:
        t = m["title"]
        if t not in collected_titles:
            collected_titles.add(t)
            new_records.append(m)
            
    if new_records:
        print(f"  -> Found {len(new_records)} new unique markets.")
        with open(OUTPUT_FILE, "a", encoding="utf-8") as f:
            for r in new_records:
                f.write(json.dumps(r) + "\n")
    else:
        print("  -> No new markets in this batch.")
        
    time.sleep(1)

print(f"Done. Total unique titles: {len(collected_titles)}")