In [1]:
import pandas as pd
import polars as pl
import numpy as np
import ops
import time

In [2]:
N = 10_000_000
np.random.seed(1234)  # set seed for reproducibility
arr = np.random.rand(N) * 1_000_000
df = pd.DataFrame({"x": arr})
df

Unnamed: 0,x
0,191519.450379
1,622108.771040
2,437727.739007
3,785358.583714
4,779975.808119
...,...
9999995,111155.366592
9999996,496823.104737
9999997,194689.621095
9999998,820116.286550


In [3]:
methods = ["nearest", "lower", "higher", "midpoint", "linear"]
window = 100
q = 0.2

In [4]:
t1 = time.time()
for method in methods:
    df[f"pd_q{q}_{method}"] = (
        df["x"].rolling(window=window).quantile(q, interpolation=method)
    )
print("Pandas rolling quantile time:", time.time() - t1)
df

Pandas rolling quantile time: 11.816467761993408


Unnamed: 0,x,pd_q0.2_nearest,pd_q0.2_lower,pd_q0.2_higher,pd_q0.2_midpoint,pd_q0.2_linear
0,191519.450379,,,,,
1,622108.771040,,,,,
2,437727.739007,,,,,
3,785358.583714,,,,,
4,779975.808119,,,,,
...,...,...,...,...,...,...
9999995,111155.366592,252467.256543,224727.882025,252467.256543,238597.569284,246919.381640
9999996,496823.104737,252467.256543,224727.882025,252467.256543,238597.569284,246919.381640
9999997,194689.621095,252467.256543,224727.882025,252467.256543,238597.569284,246919.381640
9999998,820116.286550,275551.196405,252467.256543,275551.196405,264009.226474,270934.408433


In [5]:
t1 = time.time()
for method in methods:
    df[f"q{q}_{method}"] = ops.rolling_quantile(
        arr,
        window=window,
        q=q,
        method=getattr(ops.QuantileMethod, method.capitalize()),
    )
print("Ops rolling quantile time:", time.time() - t1)
df

Ops rolling quantile time: 1.3570387363433838


Unnamed: 0,x,pd_q0.2_nearest,pd_q0.2_lower,pd_q0.2_higher,pd_q0.2_midpoint,pd_q0.2_linear,q0.2_nearest,q0.2_lower,q0.2_higher,q0.2_midpoint,q0.2_linear
0,191519.450379,,,,,,,,,,
1,622108.771040,,,,,,,,,,
2,437727.739007,,,,,,,,,,
3,785358.583714,,,,,,,,,,
4,779975.808119,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
9999995,111155.366592,252467.256543,224727.882025,252467.256543,238597.569284,246919.381640,252467.256543,224727.882025,252467.256543,238597.569284,246919.381640
9999996,496823.104737,252467.256543,224727.882025,252467.256543,238597.569284,246919.381640,252467.256543,224727.882025,252467.256543,238597.569284,246919.381640
9999997,194689.621095,252467.256543,224727.882025,252467.256543,238597.569284,246919.381640,252467.256543,224727.882025,252467.256543,238597.569284,246919.381640
9999998,820116.286550,275551.196405,252467.256543,275551.196405,264009.226474,270934.408433,275551.196405,252467.256543,275551.196405,264009.226474,270934.408433


In [6]:
for method in methods:
    print(
        f"Method: {method}, Equals: {df[f'pd_q{q}_{method}'].equals(df[f'q{q}_{method}'])}"
    )

Method: nearest, Equals: True
Method: lower, Equals: True
Method: higher, Equals: True
Method: midpoint, Equals: True
Method: linear, Equals: True


In [7]:
pldf = pl.DataFrame({"x": arr})
t1 = time.time()
pldf = pldf.with_columns(
    [
        pldf["x"]
        .rolling_quantile(window_size=window, quantile=q, interpolation=method)
        .alias(f"pl_q{q}_{method}")
        for method in methods
    ]
)
print("Polars rolling quantile time:", time.time() - t1)
pldf

Polars rolling quantile time: 2.2604880332946777


x,pl_q0.2_nearest,pl_q0.2_lower,pl_q0.2_higher,pl_q0.2_midpoint,pl_q0.2_linear
f64,f64,f64,f64,f64,f64
191519.450379,,,,,
622108.77104,,,,,
437727.739007,,,,,
785358.583714,,,,,
779975.808119,,,,,
…,…,…,…,…,…
111155.366592,252467.256543,224727.882025,252467.256543,238597.569284,246919.38164
496823.104737,252467.256543,224727.882025,252467.256543,238597.569284,246919.38164
194689.621095,252467.256543,224727.882025,252467.256543,238597.569284,246919.38164
820116.28655,275551.196405,252467.256543,275551.196405,264009.226474,270934.408433
