In [1]:
# Import libraries
import duckdb
from pathlib import Path
import pandas as pd

In [2]:
# Configuration
DATA_DIR = Path("../fetcher/data/trades")
PARQUET_GLOB = str(DATA_DIR / "**" / "*.parquet")

print(f"Looking for parquet files in: {DATA_DIR.absolute()}")
print(f"Glob pattern: {PARQUET_GLOB}")

Looking for parquet files in: c:\Users\User\Desktop\VibeCoding\PolyMarketScrapping\notebooks\..\fetcher\data\trades
Glob pattern: ..\fetcher\data\trades\**\*.parquet


In [3]:
# Create in-memory DuckDB connection and register parquet files as view
conn = duckdb.connect(":memory:")

conn.execute(f"""
    CREATE VIEW trades AS 
    SELECT * FROM read_parquet('{PARQUET_GLOB}', hive_partitioning=true)
""")

print("Trades view created successfully")

IOException: IO Error: No files found that match the pattern "..\fetcher\data\trades\**\*.parquet"

LINE 3:     SELECT * FROM read_parquet('..\fetcher\data\trades\**\*.parquet', hive_pa...
                          ^

## Schema & Basic Stats

In [None]:
# View schema
conn.execute("DESCRIBE trades").fetchdf()

In [None]:
# Count total records
conn.execute("SELECT COUNT(*) as total_trades FROM trades").fetchdf()

In [None]:
# Sample data
conn.execute("SELECT * FROM trades LIMIT 10").fetchdf()

## Explore by Date Partition

In [None]:
# Records by date partition (if using hive partitioning)
conn.execute("""
    SELECT dt, COUNT(*) as count 
    FROM trades 
    GROUP BY dt 
    ORDER BY dt
""").fetchdf()

## Custom Queries

In [None]:
# Add your custom queries here
query = """
    SELECT * FROM trades
    LIMIT 100
"""

conn.execute(query).fetchdf()

In [None]:
# Close connection when done
conn.close()