In [1]:
import os
os.environ["POLARS_MAX_THREADS"] = "1"

import build.algos as algos
import time
import numpy as np
import polars as pl
print(pl.thread_pool_size())

1


In [2]:
def test_median(fn):
    arr = np.array([1.0, 3.5, 2.5, 4.0, 5.5])
    result = fn(arr)
    print("median:", result)
    print(arr)


test_median(np.median)
test_median(algos.median_sort)

median: 3.5
[1.  3.5 2.5 4.  5.5]
median: 3.5
[1.  2.5 3.5 4.  5.5]


In [3]:
N = 100_000_000
np.random.seed(1234)  # set seed for reproducibility
arr = np.random.rand(N) * 1_000_000
arr

array([191519.45037889, 622108.77103983, 437727.73900711, ...,
       224903.00600613, 482318.49483513, 908686.57640487])

In [4]:
pldf = pl.DataFrame({"x": arr})
pldf

x
f64
191519.450379
622108.77104
437727.739007
785358.583714
779975.808119
…
726861.776622
460858.145885
224903.006006
482318.494835


In [5]:
def time_median(arr, fn, repeat):
    for i in range(repeat):
        t1 = time.time()
        result = fn(arr)
        print("time:", time.time() - t1, "result:", result)


def time_median_pl(pl_col, repeat):
    for i in range(repeat):
        t1 = time.time()
        result = pl_col.median()
        print("time:", time.time() - t1, "result:", result)

In [6]:
repeat = 7
time_median_pl(pldf["x"], repeat)

time: 0.4251832962036133 result: 499978.27034853515
time: 0.43334174156188965 result: 499978.27034853515
time: 0.44308996200561523 result: 499978.27034853515
time: 0.5052855014801025 result: 499978.27034853515
time: 0.43723320960998535 result: 499978.27034853515
time: 0.5158951282501221 result: 499978.27034853515
time: 0.4508979320526123 result: 499978.27034853515


In [7]:
repeat = 7
time_median(arr, np.median, repeat)

time: 0.9991464614868164 result: 499978.27034853515
time: 0.7064235210418701 result: 499978.27034853515
time: 0.7046847343444824 result: 499978.27034853515
time: 0.7427153587341309 result: 499978.27034853515
time: 0.7353174686431885 result: 499978.27034853515
time: 0.7162046432495117 result: 499978.27034853515
time: 0.7131748199462891 result: 499978.27034853515


In [8]:
repeat = 7
time_median(arr, algos.median_select_nth_unstable_rust, repeat)

time: 0.4365999698638916 result: 499978.27034853515
time: 0.343212366104126 result: 499978.27034853515
time: 0.3964829444885254 result: 499978.27034853515
time: 0.3470337390899658 result: 499978.27034853515
time: 0.35475969314575195 result: 499978.27034853515
time: 0.34731316566467285 result: 499978.27034853515
time: 0.34158992767333984 result: 499978.27034853515


In [9]:
repeat = 7
time_median(arr, algos.median_nth_element, repeat)

time: 1.0078232288360596 result: 499978.27034853515
time: 1.0027246475219727 result: 499978.27034853515
time: 1.0499656200408936 result: 499978.27034853515
time: 1.0296411514282227 result: 499978.27034853515
time: 1.0591206550598145 result: 499978.27034853515
time: 1.092536449432373 result: 499978.27034853515
time: 1.0868582725524902 result: 499978.27034853515


In [10]:
repeat = 7
time_median(arr, algos.median_quickselect, repeat)

time: 0.844318151473999 result: 499978.27034853515
time: 0.789970874786377 result: 499978.27034853515
time: 0.8036441802978516 result: 499978.27034853515
time: 0.7459836006164551 result: 499978.27034853515
time: 0.7465386390686035 result: 499978.27034853515
time: 0.7397961616516113 result: 499978.27034853515
time: 0.737584114074707 result: 499978.27034853515
