In [1]:
# Import libraries
import duckdb
from pathlib import Path
import pandas as pd

In [2]:
# Configuration
DATA_DIR = Path("../data/trades")
PARQUET_GLOB = str(DATA_DIR / "**" / "*.parquet")

print(f"Looking for parquet files in: {DATA_DIR.absolute()}")
print(f"Glob pattern: {PARQUET_GLOB}")

Looking for parquet files in: c:\Users\User\Desktop\VibeCoding\PolyMarketScrapping\notebooks\..\data\trades
Glob pattern: ..\data\trades\**\*.parquet


In [3]:
# Create in-memory DuckDB connection and register parquet files as view
conn = duckdb.connect(":memory:")

# Attach the MarketDim database
db_path = Path("../../PolyMarketData/polymarket.duckdb")
conn.execute(f"ATTACH '{db_path}' AS marketdb (READ_ONLY)")

conn.execute(f"""
    CREATE VIEW trades AS 
    SELECT * FROM read_parquet('{PARQUET_GLOB}', hive_partitioning=true)
""")

print("Trades view created successfully")
print(f"MarketDim database attached from: {db_path.absolute()}")

Trades view created successfully
MarketDim database attached from: c:\Users\User\Desktop\VibeCoding\PolyMarketScrapping\notebooks\..\..\PolyMarketData\polymarket.duckdb


## Schema & Basic Stats

In [4]:
# View schema
conn.execute("DESCRIBE trades").fetchdf()

Unnamed: 0,column_name,column_type,null,key,default,extra
0,proxyWallet,VARCHAR,YES,,,
1,side,VARCHAR,YES,,,
2,price,DOUBLE,YES,,,
3,size,DOUBLE,YES,,,
4,conditionId,VARCHAR,YES,,,
5,timestamp,BIGINT,YES,,,
6,transactionHash,VARCHAR,YES,,,
7,dt,DATE,YES,,,


In [9]:
# Count total records
conn.execute("SELECT COUNT(distinct transactionHash) as total_trades, count(distinct proxyWallet) as unique_wallets, " \
"   sum(size)/1000000 as total_size_millions" \
" FROM trades").fetchdf()

Unnamed: 0,total_trades,unique_wallets,total_size_millions
0,20182910,839541,11651.182272


In [10]:
# Sample data
conn.execute("SELECT * FROM trades LIMIT 10").fetchdf()

Unnamed: 0,proxyWallet,side,price,size,conditionId,timestamp,transactionHash,dt
0,0x260fa6499b0d5f2ed927a9b7075104e5f04fdfff,SELL,0.999,5.0,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,1765400072,0x74d2a11c5834d038b81d50237f6f0d198a8bbd4bc414...,2025-12-13
1,0xf470a6bfc43aaf884227092a3d95bd3788ca0994,SELL,0.999,7.91,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,1765398826,0x5241f1252983e2bab9919401e6dc348aef56c0ed318a...,2025-12-13
2,0x97899954ea216c7b9887541723f57a60e3e82cb7,SELL,0.999,10.0,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,1765396594,0x2d5bc4a67909ca100525ebde7ccb8ed6efd78f0c5199...,2025-12-13
3,0xfef9e605139d3785c4097c35e0ddd7e032e2a843,SELL,0.999,6.18,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,1765393442,0x8f951bc698d8bafc82b1d8e79beb84ea0abc228e8420...,2025-12-13
4,0x2f633efb75256a2f2445110c8978684ab8936643,SELL,0.999,242.93,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,1765393398,0xc23fd133f7ff6f698a0e4eb157062ed8ec5b63c39bc2...,2025-12-13
5,0x34134e7c29b654c4f53ec213e6f35808b3f05204,BUY,0.001,343.0,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,1765393334,0x1a101d7ef73c1c78bd8f645c0d7b9ce7ab508c328599...,2025-12-13
6,0xd91218feb4c61d6f58c8db78a5726479a47cddfe,SELL,0.999,3.0,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,1765393262,0x500d026aed7300b29c4350be8448314a885ea1bb3d67...,2025-12-13
7,0xf5b217ae417cc57c64ba5f3b001c9e8223fb879b,SELL,0.999,4.0,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,1765393234,0x87280f61140f7aa26898ea9dacb6d11eb87c89f1a939...,2025-12-13
8,0x6ffb4354cbe6e0f9989e3b55564ec5fb8646a834,BUY,0.998996,5862.37,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,1765393228,0x9053fc663870c028e12c2530e1aff576df33e6a4ccb4...,2025-12-13
9,0x8e9eedf20dfa70956d49f608a205e402d9df38e4,SELL,0.001,1442.27,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,1765393216,0x4eb13504bbed5800a6c4812d0f6b91bf4ce6fc7014f4...,2025-12-13


In [None]:
# Sum of distinct trade prices
conn.execute("""
    SELECT 
        COUNT(DISTINCT price) as distinct_price_count,
        SUM(DISTINCT price) as sum_distinct_prices
    FROM trades
""").fetchdf()

## Explore by Date Partition

In [None]:
# Records by date partition (if using hive partitioning)
conn.execute("""
    SELECT dt, COUNT(*) as count 
    FROM trades 
    GROUP BY dt 
    ORDER BY dt
""").fetchdf()

## MarketDim Connection

In [11]:
# Connect to MarketDim database
from pathlib import Path

db_path = Path("../../PolyMarketData/polymarket.duckdb")
db_conn = duckdb.connect(str(db_path))

print(f"Connected to: {db_path.absolute()}")
print(f"MarketDim rows: {db_conn.execute('SELECT COUNT(*) FROM MarketDim').fetchone()[0]:,}")

Connected to: c:\Users\User\Desktop\VibeCoding\PolyMarketScrapping\notebooks\..\..\PolyMarketData\polymarket.duckdb
MarketDim rows: 65,415


In [12]:
# Sample MarketDim data
db_conn.execute("SELECT market_id, external_id, question, active, start_date_iso FROM MarketDim LIMIT 10").fetchdf()

Unnamed: 0,market_id,external_id,question,active,start_date_iso
0,65416,0x0002a45f7736686e98f5e6476a3d51dd48db232f4911...,Will the Palisades fire burn less than 20k acr...,True,2025-01-09 17:18:52.530250
1,65417,0x000388290ce64d1e5e98a246b96f6c3af5e731d6d0f2...,Will Nottingham Forest vs. Crystal Palace end ...,True,2024-10-11 18:05:35.111931
2,65418,0x0003c55b045243989673c96d6df12e4f6a74ad6b9b56...,Will Jets vs. Titans be the highest scoring ga...,True,2024-09-12 17:17:23.737427
3,65419,0x0004866a1cd8e94ac08e4ff562f038a8d36549cb148f...,"Solana Up or Down - July 3, 1AM ET",True,2025-07-01 15:50:30.005384
4,65420,0x0004a8999a7b83148a9bf5e5ec66495d1e4c48e6b2b5...,Will Adam Scott win the 2025 Memorial Tournament?,True,2025-05-26 16:23:14.728531
5,65421,0x000693af411390222d3c950f6cdb7b539a9a736fffd7...,Over 239.5,True,2025-03-31 00:10:39.358632
6,65422,0x0007bac99743e5596e1bada6fd3545da8a65282d6510...,Favorite(Jazz) vs Underdog(Raptors) Line: 5.5,True,2025-03-15 01:14:49.295837
7,65423,0x0007e75ef4dd9285b629ab37d40c60fe3f0a254b32d9...,Will the Dallas Mavericks finish with the wors...,True,2025-02-24 18:40:39.295617
8,65424,0x00086d72c4a9945a7c5aa100907f1e128b490e4fcb2b...,Will Daniel Brown win the 2025 Genesis Scottis...,True,2025-07-07 17:49:34.302612
9,65425,0x0008ddb7d2b82439e50a1d6045ea22187ae0423ac7ec...,Pacers vs Knicks: O/U 225.5,True,2025-05-21 00:55:59.813065


## Custom Queries

In [7]:
# Add your custom queries here
query = """
    SELECT 
        t.*,
        m.*
    FROM trades t
    LEFT JOIN marketdb.MarketDim m ON t.conditionId = m.external_id
    where outcome IS NOT NULL AND active 
    LIMIT 100
"""

conn.execute(query).fetchdf()

Unnamed: 0,proxyWallet,side,price,size,conditionId,timestamp,transactionHash,dt,market_id,external_id,event_id,question,active,end_date_iso,Outcome,start_date_iso,VolumeNum,created_at
0,0x260fa6499b0d5f2ed927a9b7075104e5f04fdfff,SELL,0.999000,5.000000,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,1765400072,0x74d2a11c5834d038b81d50237f6f0d198a8bbd4bc414...,2025-12-13,1479,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,9227,Will 2 Fed rate cuts happen in 2025?,True,2025-12-10 12:00:00,"[""Yes"", ""No""]",2024-12-29 22:45:22.634712,3458608.464429,2025-12-13 19:27:42.642278
1,0xf470a6bfc43aaf884227092a3d95bd3788ca0994,SELL,0.999000,7.910000,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,1765398826,0x5241f1252983e2bab9919401e6dc348aef56c0ed318a...,2025-12-13,1479,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,9227,Will 2 Fed rate cuts happen in 2025?,True,2025-12-10 12:00:00,"[""Yes"", ""No""]",2024-12-29 22:45:22.634712,3458608.464429,2025-12-13 19:27:42.642278
2,0x97899954ea216c7b9887541723f57a60e3e82cb7,SELL,0.999000,10.000000,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,1765396594,0x2d5bc4a67909ca100525ebde7ccb8ed6efd78f0c5199...,2025-12-13,1479,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,9227,Will 2 Fed rate cuts happen in 2025?,True,2025-12-10 12:00:00,"[""Yes"", ""No""]",2024-12-29 22:45:22.634712,3458608.464429,2025-12-13 19:27:42.642278
3,0xfef9e605139d3785c4097c35e0ddd7e032e2a843,SELL,0.999000,6.180000,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,1765393442,0x8f951bc698d8bafc82b1d8e79beb84ea0abc228e8420...,2025-12-13,1479,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,9227,Will 2 Fed rate cuts happen in 2025?,True,2025-12-10 12:00:00,"[""Yes"", ""No""]",2024-12-29 22:45:22.634712,3458608.464429,2025-12-13 19:27:42.642278
4,0x2f633efb75256a2f2445110c8978684ab8936643,SELL,0.999000,242.930000,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,1765393398,0xc23fd133f7ff6f698a0e4eb157062ed8ec5b63c39bc2...,2025-12-13,1479,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,9227,Will 2 Fed rate cuts happen in 2025?,True,2025-12-10 12:00:00,"[""Yes"", ""No""]",2024-12-29 22:45:22.634712,3458608.464429,2025-12-13 19:27:42.642278
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0x121c9825f84c2a051e1bc922e182926c24501d1e,BUY,0.028000,178.571427,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,1765373037,0x7c4bde144b393d7aeeb07ead09af44493da55bcaaaf2...,2025-12-13,1479,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,9227,Will 2 Fed rate cuts happen in 2025?,True,2025-12-10 12:00:00,"[""Yes"", ""No""]",2024-12-29 22:45:22.634712,3458608.464429,2025-12-13 19:27:42.642278
96,0xb71c020f848dd67fac949b3da289a048177663a8,BUY,0.978000,2.044954,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,1765372599,0xb125117ee05cc5d3c063263c62157bf3ecb81c3cb498...,2025-12-13,1479,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,9227,Will 2 Fed rate cuts happen in 2025?,True,2025-12-10 12:00:00,"[""Yes"", ""No""]",2024-12-29 22:45:22.634712,3458608.464429,2025-12-13 19:27:42.642278
97,0x2cc711083e3dac99ef2a9203cd2630e764be5536,BUY,0.027129,295.000000,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,1765371965,0x333788adb88849d197a26a4625214eb275585e323469...,2025-12-13,1479,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,9227,Will 2 Fed rate cuts happen in 2025?,True,2025-12-10 12:00:00,"[""Yes"", ""No""]",2024-12-29 22:45:22.634712,3458608.464429,2025-12-13 19:27:42.642278
98,0x92ddee7309167ab59d62c9c0a1c6c1975b78dac8,BUY,0.027113,110.650000,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,1765371777,0x6a866bd128c271c8eb20dc87333bb178b3986c89b0ee...,2025-12-13,1479,0x05af636e0989accb08334a74f69e5368e0aa28fe498f...,9227,Will 2 Fed rate cuts happen in 2025?,True,2025-12-10 12:00:00,"[""Yes"", ""No""]",2024-12-29 22:45:22.634712,3458608.464429,2025-12-13 19:27:42.642278


In [None]:
# Close connection when done
conn.close()