# VizFlow Demo - v0.4.1

Demonstrates **column mapping** and **schema evolution** using ylin meords data.

In [14]:
import polars as pl
import vizflow as vf
from pathlib import Path

## 1. Schema Evolution + Column Mapping

Set `column_preset="ylin"` to auto-rename 52 columns.  
Set `trade_schema` to cast `order_qty` from Float64 to Int64.

In [15]:
config = vf.Config(
    trade_dir=Path("data/ylin/trade"),
    trade_pattern="{date}.meords",
    column_preset="ylin",
    market="CN",
    trade_schema={
        "order_qty": vf.ColumnSchema(cast_to=pl.Int64),
    },
)
vf.set_config(config)

df = vf.scan_trade("11110101")

print("=== Schema Evolution: order_qty (Float64 → Int64) ===")
print(df.select(["ukey", "order_side", "order_qty"]).head(5).collect())

=== Schema Evolution: order_qty (Float64 → Int64) ===
shape: (5, 3)
┌──────────┬────────────┬───────────┐
│ ukey     ┆ order_side ┆ order_qty │
│ ---      ┆ ---        ┆ ---       │
│ i64      ┆ str        ┆ i64       │
╞══════════╪════════════╪═══════════╡
│ 11000408 ┆ Sell       ┆ 2900      │
│ 11002405 ┆ Buy        ┆ 27100     │
│ 11000739 ┆ Sell       ┆ 400       │
│ 11002709 ┆ Sell       ┆ 600       │
│ 11300436 ┆ Sell       ┆ 14000     │
└──────────┴────────────┴───────────┘


In [16]:
print("=== Column Mapping: ylin preset ===")
cols = df.collect_schema().names()
print(f"Total columns: {len(cols)}")
print()
print("Sample data with standard column names:")
print(df.select(['ukey', 'order_side', 'order_qty', 'bid_px0', 'ask_px0', 'timestamp']).head(5).collect())

=== Column Mapping: ylin preset ===
Total columns: 89

Sample data with standard column names:
shape: (5, 6)
┌──────────┬────────────┬───────────┬─────────┬─────────┬───────────┐
│ ukey     ┆ order_side ┆ order_qty ┆ bid_px0 ┆ ask_px0 ┆ timestamp │
│ ---      ┆ ---        ┆ ---       ┆ ---     ┆ ---     ┆ ---       │
│ i64      ┆ str        ┆ i64       ┆ f64     ┆ f64     ┆ i64       │
╞══════════╪════════════╪═══════════╪═════════╪═════════╪═══════════╡
│ 11000408 ┆ Sell       ┆ 2900      ┆ 22.48   ┆ 22.5    ┆ 93000000  │
│ 11002405 ┆ Buy        ┆ 27100     ┆ 7.19    ┆ 7.2     ┆ 93000000  │
│ 11000739 ┆ Sell       ┆ 400       ┆ 14.97   ┆ 14.98   ┆ 93000000  │
│ 11002709 ┆ Sell       ┆ 600       ┆ 14.7    ┆ 14.71   ┆ 93000000  │
│ 11300436 ┆ Sell       ┆ 14000     ┆ 20.3    ┆ 20.31   ┆ 93000010  │
└──────────┴────────────┴───────────┴─────────┴─────────┴───────────┘


## 2. Parse Timestamps

Convert `timestamp` (HHMMSSMMM) to `tod_timestamp` (pl.Time) and `elapsed_timestamp` (ms).

In [4]:
df = vf.parse_time(df, timestamp_col="timestamp")

print("=== Parse Timestamps ===")
print(df.select(["ukey", "timestamp", "tod_timestamp", "elapsed_timestamp"]).head(6).collect())

=== Parse Timestamps ===
shape: (6, 4)
┌──────────┬───────────┬───────────────┬───────────────────┐
│ ukey     ┆ timestamp ┆ tod_timestamp ┆ elapsed_timestamp │
│ ---      ┆ ---       ┆ ---           ┆ ---               │
│ i64      ┆ i64       ┆ time          ┆ i64               │
╞══════════╪═══════════╪═══════════════╪═══════════════════╡
│ 11000408 ┆ 93000000  ┆ 09:30:00      ┆ 0                 │
│ 11002405 ┆ 93000000  ┆ 09:30:00      ┆ 0                 │
│ 11000739 ┆ 93000000  ┆ 09:30:00      ┆ 0                 │
│ 11002709 ┆ 93000000  ┆ 09:30:00      ┆ 0                 │
│ 11300436 ┆ 93000010  ┆ 09:30:00.010  ┆ 10                │
│ 11301288 ┆ 93000010  ┆ 09:30:00.010  ┆ 10                │
└──────────┴───────────┴───────────────┴───────────────────┘


## 3. Binning

Discretize `bid_px0` to 1.0 increments.

In [5]:
df = vf.bin(df, widths={"bid_px0": 1.0})

print("=== Binning ===")
print(df.select(["ukey", "bid_px0", "bid_px0_bin"]).head(6).collect())

=== Binning ===
shape: (6, 3)
┌──────────┬─────────┬─────────────┐
│ ukey     ┆ bid_px0 ┆ bid_px0_bin │
│ ---      ┆ ---     ┆ ---         │
│ i64      ┆ f64     ┆ i64         │
╞══════════╪═════════╪═════════════╡
│ 11000408 ┆ 22.48   ┆ 22          │
│ 11002405 ┆ 7.19    ┆ 7           │
│ 11000739 ┆ 14.97   ┆ 15          │
│ 11002709 ┆ 14.7    ┆ 15          │
│ 11300436 ┆ 20.3    ┆ 20          │
│ 11301288 ┆ 13.78   ┆ 14          │
└──────────┴─────────┴─────────────┘


## 4. Aggregation

Group by `order_side` and compute metrics.

In [6]:
metrics = {
    "count": pl.len(),
    "total_qty": pl.col("order_qty").sum(),
    "avg_bid": pl.col("bid_px0").mean(),
}
agg_df = vf.aggregate(df, group_by=["order_side"], metrics=metrics)

print("=== Aggregation ===")
print(agg_df.collect())

=== Aggregation ===
shape: (2, 4)
┌────────────┬───────┬───────────┬──────────┐
│ order_side ┆ count ┆ total_qty ┆ avg_bid  │
│ ---        ┆ ---   ┆ ---       ┆ ---      │
│ str        ┆ u32   ┆ i64       ┆ f64      │
╞════════════╪═══════╪═══════════╪══════════╡
│ Buy        ┆ 8     ┆ 57600     ┆ 20.94125 │
│ Sell       ┆ 20    ┆ 92500     ┆ 17.6865  │
└────────────┴───────┴───────────┴──────────┘
