In [1]:
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import polars as pl
import scienceplots
from lobib import DataLoader
from lobib.utils import pl_select

import qr.estimations as est

plt.style.use(["science", "grid", "no-latex"])
loader = DataLoader()

In [2]:
ticker = "PFE"
files = list(Path(f"../data/{ticker}/daily_estimates").glob("*.parquet"))
df = pl.scan_parquet(files).collect()

In [7]:
df

date,imbalance,spread,total_best,event,side,queue,len,size,delta_t,q_1,q_2,q_3,q_4,delta_t_sum
date,f64,i32,i32,str,i64,i32,u32,list[i64],list[i64],list[i64],list[i64],list[i64],list[i64],i64
2023-12-14,-0.2,1,3,"""Add""",-1,-2,137,"[1, 1, … 1]","[32438, 13341, … 30171359]","[20, 27, … 15]","[23, 13, … 40]","[14, 28, … 72]","[20, 17, … 883]",23990725884
2023-12-14,-0.8,1,3,"""Trade""",-1,-1,111,"[4, 6, … 5]","[60236, 8686, … 141255056]","[34, 36, … 5]","[19, 15, … 117]","[15, 14, … 72]","[39, 33, … 101]",11243610061
2023-12-14,0.9,2,3,"""Add""",-1,-1,8,"[1, 1, … 10]","[10662, 2035, … 3846]","[4, 4, … 38]","[7, 7, … 68]","[7, 7, … 107]","[43, 43, … 47]",38406
2023-12-14,-0.4,1,3,"""Trade""",1,1,14,"[14, 1, … 1]","[3526, 89997646, … 13970]","[28, 34, … 13]","[16, 17, … 45]","[17, 25, … 58]","[21, 24, … 94]",2643926256
2023-12-14,0.6,2,0,"""Add""",-1,-1,7,"[1, 1, … 1]","[2365, 7681, … 74522]","[3, 3, … 12]","[5, 5, … 27]","[18, 18, … 6]","[7, 7, … 13]",119549
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2025-12-23,-0.9,1,4,"""Cancel""",1,2,3,"[1, 2, 1]","[103506, 2517, 580699935]","[50, 65, … 6]","[44, 45, … 39]","[22, 47, … 25]","[48, 61, … 66]",580805958
2025-12-23,-0.5,1,2,"""Cancel""",-1,-2,29,"[1, 1, … 1]","[2462889, 22689, … 435099190]","[22, 18, … 9]","[24, 26, … 31]","[41, 44, … 79]","[39, 58, … 103]",9264235318
2025-12-23,-0.1,1,1,"""Add""",1,1,84,"[1, 2, … 1]","[1474743342, 27516, … 1438]","[11, 11, … 8]","[20, 20, … 28]","[54, 54, … 30]","[35, 35, … 205]",25213735172
2025-12-23,-0.2,1,0,"""Add""",-1,-2,1,[1],[1026117827],"[4, 3]","[21, 24]","[51, 39]","[39, 71]",1026117827


In [9]:
df.group_by("imbalance", "spread").agg(pl.col("delta_t").flatten())

imbalance,spread,delta_t
f64,i32,list[i64]
-0.6,1,"[35863, 349283, … 1414]"
-0.9,1,"[557038730, 119845088, … 580699935]"
0.6,2,"[2365, 7681, … 1602]"
-0.6,2,"[1940632, 1647, … 240922]"
-0.5,1,"[359990833, 1302559030, … 435099190]"
…,…,…
-0.1,2,"[109773, 7191, … 22039]"
-0.3,2,"[25506377, 2392, … 10697]"
0.1,1,"[341533, 285067, … 20760]"
-1.0,1,"[283849, 610064627, … 737]"


In [10]:
d

NameError: name 'd' is not defined

In [3]:
stats = df.group_by("imbalance", "spread", "total_best", "queue", "side", "event").agg(
    pl.col("len").sum(), pl.col("delta_t_sum").sum()
)

In [4]:
stats = stats.with_columns(
    total_len_cat=pl.col("len").sum().over("imbalance", "spread", "total_best")
)

In [5]:
probabilities_3d = stats.select(
    "imbalance",
    "spread",
    "total_best",
    "queue",
    "side",
    "event",
    probability=pl.col("len").truediv(pl.col("total_len_cat")),
).sort("imbalance", "spread", "total_best", "queue", "side", "event")

In [6]:
imbalances = probabilities_3d["imbalance"].unique()
total_bests = probabilities_3d["total_best"].unique()

spread1_combos = (
    pl.DataFrame({"imbalance": imbalances})
    .join(pl.DataFrame({"total_best": total_bests}), how="cross")
    .join(pl.DataFrame({"event": ["Add", "Cancel", "Trade"]}), how="cross")
    .join(pl.DataFrame({"queue": [-2, -1, 1, 2]}), how="cross")
    .with_columns(
        spread=pl.lit(1),
        side=pl.when(pl.col("queue") > 0).then(1).otherwise(-1),
    )
).with_columns(pl.col("queue").cast(pl.Int32))

spread2_combos = (
    pl.DataFrame({"imbalance": imbalances})
    .join(pl.DataFrame({"total_best": total_bests}), how="cross")
    .join(
        pl.DataFrame(
            {
                "event": ["Create_Ask", "Create_Bid"],
                "side": [1, -1],
            }
        ),
        how="cross",
    )
    .with_columns(
        spread=pl.lit(2),
        queue=pl.lit(0),
    )
).select("imbalance", "total_best", "event", "queue", "spread", pl.col("side").cast(pl.Int32))

valid_combos = pl.concat([spread1_combos, spread2_combos])

In [7]:
valid_combos

imbalance,total_best,event,queue,spread,side
f64,i32,str,i32,i32,i32
-1.0,0,"""Add""",-2,1,-1
-1.0,0,"""Add""",-1,1,-1
-1.0,0,"""Add""",1,1,1
-1.0,0,"""Add""",2,1,1
-1.0,0,"""Cancel""",-2,1,-1
…,…,…,…,…,…
1.0,2,"""Create_Bid""",0,2,-1
1.0,3,"""Create_Ask""",0,2,1
1.0,3,"""Create_Bid""",0,2,-1
1.0,4,"""Create_Ask""",0,2,1


In [8]:
probabilities_3d.

imbalance,spread,total_best,queue,side,event,probability
f64,i32,i32,i32,i64,str,f64
-1.0,1,1,-2,-1,"""Add""",0.028181
-1.0,1,1,-2,-1,"""Cancel""",0.04162
-1.0,1,1,-1,-1,"""Add""",0.128087
-1.0,1,1,-1,-1,"""Cancel""",0.034034
-1.0,1,1,-1,-1,"""Trade""",0.075994
…,…,…,…,…,…,…
1.0,2,4,1,1,"""Add""",0.077948
1.0,2,4,1,1,"""Cancel""",0.044254
1.0,2,4,1,1,"""Trade""",0.00528
1.0,2,4,2,1,"""Add""",0.009806
