In [1]:
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import polars as pl
import seaborn as sns
from datasketches import kll_ints_sketch
from lobib import DataLoader
from lobib.utils import pl_select
from tqdm import tqdm

import qr.estimations as est

sns.set_style("whitegrid")
loader = DataLoader()

In [2]:
ticker = "PFE"
files = list(Path(f"../data/{ticker}/daily_estimates").glob("*.parquet"))
df = pl.scan_parquet(files).collect()

In [3]:
stats = df.group_by("imbalance", "spread", "total_best", "queue", "side", "event").agg(
    pl.col("len").sum(), pl.col("delta_t_sum").sum()
)

In [4]:
stats = stats.with_columns(
    total_len_cat=pl.col("len").sum().over("imbalance", "spread", "total_best")
)

In [5]:
probabilities_3d = stats.select(
    "imbalance",
    "spread",
    "total_best",
    "queue",
    "side",
    "event",
    probability=pl.col("len").truediv(pl.col("total_len_cat")),
).sort("imbalance", "spread", "total_best", "queue", "side", "event")

In [6]:
imbalances = probabilities_3d["imbalance"].unique()
total_bests = probabilities_3d["total_best"].unique()

spread1_combos = (
    pl.DataFrame({"imbalance": imbalances})
    .join(pl.DataFrame({"total_best": total_bests}), how="cross")
    .join(pl.DataFrame({"event": ["Add", "Cancel", "Trade"]}), how="cross")
    .join(pl.DataFrame({"queue": [-2, -1, 1, 2]}), how="cross")
    .with_columns(
        spread=pl.lit(1),
        side=pl.when(pl.col("queue") > 0).then(1).otherwise(-1),
    )
)

spread2_combos = (
    pl.DataFrame({"imbalance": imbalances})
    .join(pl.DataFrame({"total_best": total_bests}), how="cross")
    .join(
        pl.DataFrame(
            {
                "event": ["Create_Ask", "Create_Bid"],
                "side": [1, -1],
            }
        ),
        how="cross",
    )
    .with_columns(
        spread=pl.lit(2),
        queue=pl.lit(0),
    )
)

valid_combos = pl.concat([spread1_combos, spread2_combos])

ShapeError: unable to vstack, column names don't match: "queue" and "side"