In [7]:
import duckdb
import polars as pl
import os
# duckdb for 1 month 1 minute stock parquet
# data_dir = "../data/lake/us_stocks_sip/minute_aggs_v1/2025/07/*.parquet"
# data_dir = "../data/lake/us_stocks_sip/day_aggs_v1/*/*/*.parquet"
from quant101.core_2.config import data_dir

data_dir = os.path.join(data_dir, "lake/us_stocks_sip/day_aggs_v1/*/*/*.parquet")

con = duckdb.connect()

result = con.sql(f"""
SELECT
    ticker,
    MAX(high) as max_high,
    MIN(low) as min_low,
    SUM(volume) AS total_volume,
    SUM(volume * close) AS turnover
FROM '{data_dir}'
GROUP BY ticker
HAVING MAX(high) BETWEEN 37.20 AND 37.30
    AND MIN(low) BETWEEN 13.20 AND 13.60
""").pl()
# WHERE ticker = 'TSE'

# 方法1: 设置 Polars 显示选项

with pl.Config(tbl_rows=100, tbl_cols=10, fmt_str_lengths=50):
    print(result)
    
# zstd level 3 653MB 18-20s
# zstd level 10 631MB 18-20s

shape: (3, 5)
┌────────┬───────────┬─────────┬───────────────┬──────────┐
│ ticker ┆ max_high  ┆ min_low ┆ total_volume  ┆ turnover │
│ ---    ┆ ---       ┆ ---     ┆ ---           ┆ ---      │
│ str    ┆ f32       ┆ f32     ┆ decimal[38,0] ┆ f64      │
╞════════╪═══════════╪═════════╪═══════════════╪══════════╡
│ ICBK   ┆ 37.299999 ┆ 13.55   ┆ 18947471      ┆ 4.4948e8 │
│ QUBX   ┆ 37.240002 ┆ 13.4181 ┆ 10218798      ┆ 2.2291e8 │
│ FTXO   ┆ 37.200001 ┆ 13.37   ┆ 278790057     ┆ 7.5961e9 │
└────────┴───────────┴─────────┴───────────────┴──────────┘


In [8]:
import duckdb
import polars as pl

# splits data read

splits_dir = '../data/raw/us_stocks_sip/splits/splits.parquet'

# 1. polars
# splits = pl.read_parquet(splits_dir)
# result = splits.filter(pl.col('ticker') == 'ENVX')

# 2. duckdb
con = duckdb.connect()

result = con.sql(f"""
                 SELECT * FROM '{splits_dir}'
                 WHERE ticker ='ENVX'
                 """
).pl()

with pl.Config(tbl_rows=20, tbl_cols=20, fmt_str_lengths=100):
    print(result)


shape: (1, 5)
┌────────────────────────────────────────────────┬────────────────┬────────────┬──────────┬────────┐
│ id                                             ┆ execution_date ┆ split_from ┆ split_to ┆ ticker │
│ ---                                            ┆ ---            ┆ ---        ┆ ---      ┆ ---    │
│ str                                            ┆ str            ┆ f64        ┆ f64      ┆ str    │
╞════════════════════════════════════════════════╪════════════════╪════════════╪══════════╪════════╡
│ E1a300d5b5f47a368890f04f9508c697580362a81d7411 ┆ 2025-07-17     ┆ 7.0        ┆ 8.0      ┆ ENVX   │
│ 50703db9e488cd95625                            ┆                ┆            ┆          ┆        │
└────────────────────────────────────────────────┴────────────────┴────────────┴──────────┴────────┘


In [12]:
import duckdb
import polars as pl
import os
# duckdb for 1 month 1 minute stock parquet
# data_dir = "../data/lake/us_stocks_sip/minute_aggs_v1/2025/07/*.parquet"
# data_dir = "../data/lake/us_stocks_sip/day_aggs_v1/*/*/*.parquet"
from quant101.core_2.config import data_dir

data_dir = os.path.join(data_dir, "lake/us_stocks_sip/day_aggs_v1/*/*/*.parquet")

con = duckdb.connect()

result = con.sql(f"""
SELECT
    *
FROM '{data_dir}'
WHERE ticker = 'ENVX'
""").pl()
# 方法1: 设置 Polars 显示选项

with pl.Config(tbl_rows=100, tbl_cols=10, fmt_str_lengths=50):
    print(result)

shape: (1_041, 8)
┌────────┬──────────┬───────────┬───────────┬───────────┬───────────┬───────────────┬──────────────┐
│ ticker ┆ volume   ┆ open      ┆ close     ┆ high      ┆ low       ┆ window_start  ┆ transactions │
│ ---    ┆ ---      ┆ ---       ┆ ---       ┆ ---       ┆ ---       ┆ ---           ┆ ---          │
│ str    ┆ u64      ┆ f32       ┆ f32       ┆ f32       ┆ f32       ┆ i64           ┆ u32          │
╞════════╪══════════╪═══════════╪═══════════╪═══════════╪═══════════╪═══════════════╪══════════════╡
│ ENVX   ┆ 877766   ┆ 21.5      ┆ 17.73     ┆ 22.0      ┆ 16.709999 ┆ 1626321600000 ┆ 4785         │
│        ┆          ┆           ┆           ┆           ┆           ┆ 000000        ┆              │
│ ENVX   ┆ 1161927  ┆ 18.870001 ┆ 17.700001 ┆ 19.8853   ┆ 17.0      ┆ 1626408000000 ┆ 7515         │
│        ┆          ┆           ┆           ┆           ┆           ┆ 000000        ┆              │
│ ENVX   ┆ 475600   ┆ 17.51     ┆ 18.559999 ┆ 18.709999 ┆ 17.1      ┆ 162

In [10]:
import polars as pl

# 先查看原始timestamp的格式
spx = pl.read_parquet('../I:SPXday20150101_20250905.parquet')
spx = spx.with_columns(
    pl.from_epoch(pl.col("timestamp"), time_unit="ms")  # 从毫秒时间戳转换
    .dt.convert_time_zone('America/New_York')
    .dt.replace(hour=0, minute=0, second=0)
    .alias('date')
)
print(spx.head())

shape: (5, 10)
┌─────────┬─────────┬─────────┬─────────┬───┬───────────────┬──────────────┬──────┬────────────────┐
│ open    ┆ high    ┆ low     ┆ close   ┆ … ┆ timestamp     ┆ transactions ┆ otc  ┆ date           │
│ ---     ┆ ---     ┆ ---     ┆ ---     ┆   ┆ ---           ┆ ---          ┆ ---  ┆ ---            │
│ f64     ┆ f64     ┆ f64     ┆ f64     ┆   ┆ i64           ┆ i64          ┆ bool ┆ datetime[ms,   │
│         ┆         ┆         ┆         ┆   ┆               ┆              ┆      ┆ America/New_Yo │
│         ┆         ┆         ┆         ┆   ┆               ┆              ┆      ┆ rk]            │
╞═════════╪═════════╪═════════╪═════════╪═══╪═══════════════╪══════════════╪══════╪════════════════╡
│ 4138.09 ┆ 4145.96 ┆ 4122.47 ┆ 4136.13 ┆ … ┆ 1676354400000 ┆ null         ┆ null ┆ 2023-02-14     │
│         ┆         ┆         ┆         ┆   ┆               ┆              ┆      ┆ 00:00:00 EST   │
│ 4119.5  ┆ 4148.11 ┆ 4103.98 ┆ 4147.6  ┆ … ┆ 1676440800000 ┆ null         ┆