<a href="https://colab.research.google.com/github/GryffindorafAviator/verifiable_ai_oracle_for_prediction_markets/blob/main/pm_data_fetch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [22]:
import httpx
import json
import time

In [23]:
gamma_url = "https://gamma-api.polymarket.com"
gamma_markets_endpoint = gamma_url + "/markets"
gamma_events_endpoint = gamma_url + "/events"
gamma_tags_endpoint = gamma_url + "/tags"
limit = 20
offset = 10

In [24]:
current_markets_params = {
                "active": True,
                "closed": False,
                "archived": False,
                "limit": limit,
                "offset": offset,
            }

params = {
                "active": True,
                "closed": False,
                "archived": False,
                "limit": limit,
                "offset": offset,
            }

closed_events_params={
                # "active": True,
                "closed": True,
                # "archived": False,
                "limit": limit,
                "offset": offset,
            }

In [25]:
def get_pm_data(endpoint, query_params=None, local_file_path=None) -> list:
    if query_params is None:
        query_params = {}

    response = httpx.get(endpoint, params=query_params)

    if response.status_code != 200:
        raise RuntimeError(f"API error: HTTP {response.status_code}")

    data = response.json()

    if local_file_path:
        with open(local_file_path, "w", encoding="utf-8") as f:
            json.dump(data, f, indent=2)

    return data

In [None]:
print(get_pm_data(gamma_events_endpoint, query_params={"limit": limit}))

In [27]:
def get_all_data(endpoint, query_params, test=False) -> list:
    if "limit" not in query_params:
        raise ValueError("query_params must include 'limit'")

    offset = 0
    limit = query_params["limit"]
    all_markets = []
    params = query_params.copy()
    cnt = 0

    while True:
        params["offset"] = offset
        market_batch = get_pm_data(endpoint, params)

        if not market_batch:
            break

        all_markets.extend(market_batch)

        if len(market_batch) < limit:
            break

        offset += limit
        cnt += 1

        if test and cnt > 2:
            break

    return all_markets

In [None]:
print(get_all_data(gamma_events_endpoint, {"limit": limit}, True))

In [28]:
def stream_data_to_file(
    endpoint,
    query_params,
    sleep_sec=0.3,
    out_file=None
):
    offset = 0
    total = 0
    params = query_params.copy()

    with open(out_file, "w", encoding="utf-8") as f:
        while True:
            params["offset"] = offset

            try:
                batch = get_pm_data(endpoint, params)
            except Exception as e:
                print("retry...")
                time.sleep(2)
                continue

            if not batch:
                break

            for market in batch:
                f.write(json.dumps(market) + "\n")
                total += 1

            print(f"saved {total}")

            if len(batch) < limit:
                break

            offset += limit
            time.sleep(sleep_sec)

    print(f"done, total={total}")

In [None]:
stream_data_to_file(gamma_events_endpoint, closed_events_params, sleep_sec=0.3, out_file="/content/drive/MyDrive/AI_Oracle/all_closed_markets1.jsonl")