In [1]:
import pandas as pd
import polars as pl
import numpy as np
import ops
import time

In [2]:
a = np.array(np.arange(1, 16, 1.0))
a[3] = np.nan
a[7] = np.nan
print(a)
print(np.nanquantile(a, 0.2, method="linear"))
# ops.nanquantile modifies `a` in-place
print(ops.nanquantile(a, 0.2, method=ops.QuantileMethod.Linear))
print(a)

[ 1.  2.  3. nan  5.  6.  7. nan  9. 10. 11. 12. 13. 14. 15.]
3.8000000000000007
3.8000000000000007
[ 1.  2.  3.  5.  6.  7.  9. 10. 11. 12. 13. 14. 15. nan nan]


In [3]:
df = pd.DataFrame({"x": range(1, 14)})
df["rank"] = df["x"].rank(pct=True)
df

Unnamed: 0,x,rank
0,1,0.076923
1,2,0.153846
2,3,0.230769
3,4,0.307692
4,5,0.384615
5,6,0.461538
6,7,0.538462
7,8,0.615385
8,9,0.692308
9,10,0.769231


In [4]:
a = np.array(range(1, 14))
print(np.quantile(a, q=0.2, method="nearest"))
print(np.quantile(a, q=0.2, method="lower"))
print(np.quantile(a, q=0.2, method="higher"))
print(np.quantile(a, q=0.2, method="midpoint"))
print(np.quantile(a, q=0.2, method="linear"))

3
3
4
3.5
3.4000000000000004


In [5]:
a = np.array(range(1, 14))
print(ops.quantile(a, q=0.2, method=ops.QuantileMethod.Nearest))
print(ops.quantile(a, q=0.2, method=ops.QuantileMethod.Lower))
print(ops.quantile(a, q=0.2, method=ops.QuantileMethod.Higher))
print(ops.quantile(a, q=0.2, method=ops.QuantileMethod.Midpoint))
print(ops.quantile(a, q=0.2, method=ops.QuantileMethod.Linear))

3.0
3.0
4.0
3.5
3.4000000000000004


In [6]:
N = 100_000_000
np.random.seed(1234)  # set seed for reproducibility
arr = np.random.rand(N) * 1_000_000
arr

array([191519.45037889, 622108.77103983, 437727.73900711, ...,
       224903.00600613, 482318.49483513, 908686.57640487])

In [7]:
t1 = time.time()
print(np.median(arr))
print("Numpy median time:", time.time() - t1)

499978.27034853515
Numpy median time: 0.966604471206665


In [8]:
t1 = time.time()
print(pl.Series(arr).median())
print("Polars median time:", time.time() - t1)

499978.27034853515
Polars median time: 0.5756430625915527


In [9]:
t1 = time.time()
print(ops.median(arr))
print("Ops median time:", time.time() - t1)

499978.27034853515
Ops median time: 0.44979119300842285


In [10]:
t1 = time.time()
print(ops.quantile(arr, 0.5, method=ops.QuantileMethod.Midpoint))
print("Ops 0.5 quantile time:", time.time() - t1)

499978.27034853515
Ops 0.5 quantile time: 0.3442199230194092


In [11]:
t1 = time.time()
print(np.quantile(arr, q=0.2, method="nearest"))
print(np.quantile(arr, q=0.2, method="lower"))
print(np.quantile(arr, q=0.2, method="higher"))
print(np.quantile(arr, q=0.2, method="midpoint"))
print(np.quantile(arr, q=0.2, method="linear"))
print("Numpy quantile time:", time.time() - t1)

200003.6758994851
200003.66231440337
200003.6758994851
200003.66910694423
200003.67318246877
Numpy quantile time: 5.226744890213013


In [12]:
t1 = time.time()
print(ops.quantile(arr, q=0.2, method=ops.QuantileMethod.Nearest))
print(ops.quantile(arr, q=0.2, method=ops.QuantileMethod.Lower))
print(ops.quantile(arr, q=0.2, method=ops.QuantileMethod.Higher))
print(ops.quantile(arr, q=0.2, method=ops.QuantileMethod.Midpoint))
print(ops.quantile(arr, q=0.2, method=ops.QuantileMethod.Linear))
print("Ops quantile time:", time.time() - t1)

200003.6758994851
200003.66231440337
200003.6758994851
200003.66910694423
200003.67318246877
Ops quantile time: 1.6498901844024658
