In [None]:
import json

a = json.loads(open('temp.json').read())

len(a)

In [None]:
import requests
import json
import time

BASE_URL = "https://gamma-api.polymarket.com/tags"
LIMIT = 300  # increase if API allows
OUTPUT_FILE = "polymarket_tags.jsonl"

offset = 0
total_fetched = 0

with open(OUTPUT_FILE, "w") as f:
    while True:
        params = {
            "limit": LIMIT,
            "offset": offset,
        }

        resp = requests.get(BASE_URL, params=params, timeout=10)
        resp.raise_for_status()

        tags = resp.json()

        if not tags:
            print("No more tags. Done.")
            break

        # for tag in tags:
        #     f.write(json.dumps(tag) + "\n")

        fetched = len(tags)
        total_fetched += fetched
        offset += fetched

        print(f"Fetched {fetched} tags (total: {total_fetched})")

        # polite delay (optional but recommended)
        time.sleep(0.1)

print(f"Saved {total_fetched} tags to {OUTPUT_FILE}")

In [None]:
import json

with open("polymarket_tags.jsonl") as f:
    tags = [json.loads(line) for line in f if line.strip()]

In [None]:
import json
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

INPUT_FILE = "polymarket_tags.jsonl"

# 1. Load labels
labels = []
metadata = []

with open(INPUT_FILE) as f:
    for line in f:
        obj = json.loads(line)
        labels.append(obj["label"])
        metadata.append(obj)  # keep full object if you want retrieval

# 2. Embed labels
model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(labels, normalize_embeddings=True)

# 3. Create FAISS index
dim = embeddings.shape[1]
index = faiss.IndexFlatIP(dim)  # cosine similarity via normalized vectors
index.add(np.array(embeddings))

print(f"FAISS index created with {index.ntotal} vectors")


In [None]:
# Business
# Politics
# Science
# Technology
# Health
# Sustainability
# Travel
# Pets
# Curiosities
# Entertainment
# Weather
# Finance
# International

In [None]:
def search(query, k=5):
    q_emb = model.encode([query], normalize_embeddings=True)
    scores, idxs = index.search(q_emb, k)
    return [(labels[i], scores[0][j]) for j, i in enumerate(idxs[0])]


tag = "Science"
results = search(tag, k=50)
for label, score in results:
    if score < 0.5:
        continue
    print(f"{label}: {score:.4f}")


related_tags = [label for label, score in results if score >= 0.5]

In [None]:
def get_tag_id(label):
    for item in metadata:
        if item["label"] == label:
            return item["id"]
    return None


In [None]:
len(related_tags)

In [None]:
import requests


def fetch_all_events(tag_id, limit=300):
    url = "https://gamma-api.polymarket.com/events"
    offset = 0
    events = []

    while True:
        params = {
            "closed": "false",
            "tag_id": tag_id,
            "limit": limit,
            "offset": offset,
        }

        resp = requests.get(url, params=params, timeout=15)
        resp.raise_for_status()

        batch = resp.json()

        if not batch:
            break

        events.extend(batch)
        offset += len(batch)

    return events


# example usage
events = fetch_all_events(tag_id=12321)
print(f"Fetched {len(events)} events")

In [None]:
tag = "Finance"
results = search(tag, k=50)
related_tags = [label for label, score in results if score >= 0.5]

total_events = []
for tag in related_tags:
    tag_id = get_tag_id(tag)
    if tag_id is None:
        continue
    events = fetch_all_events(tag_id=tag_id)
    total_events.extend(events)
    print(f"Tag: {tag} (ID: {tag_id}) - Events fetched: {len(events)}")

unique_event_ids = set()
for event in total_events:
    unique_event_ids.add(event["id"])

print(f"Total events fetched across related tags: {len(unique_event_ids)}")