forked from pola-rs/polars
-
Notifications
You must be signed in to change notification settings - Fork 0
/
polars_groupby.py
27 lines (24 loc) · 1.02 KB
/
polars_groupby.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import datetime
import glob
import polars as pl
from polars.datatypes import Utf8
from cmp.utils import peak_memory
files = glob.glob("../data/1*.csv")
files.sort()
with open("../data/mem_polars.txt", "w") as mem_f:
with open("../data/polars_bench.txt", "w") as fh:
with open("../data/polars_bench_str.txt", "w") as f_str:
for fn in files:
df = pl.read_csv(fn)
df["str"] = df["str"].cast(Utf8)
t0 = datetime.datetime.now()
for _ in range(3):
res = df.groupby("groups").select("values").sum()
duration = (datetime.datetime.now() - t0) / 3
fh.write(f"{duration.microseconds}\n")
t0 = datetime.datetime.now()
for _ in range(3):
res = df.groupby("str").select("values").sum()
duration = (datetime.datetime.now() - t0) / 3
f_str.write(f"{duration.microseconds}\n")
mem_f.write(str(peak_memory()) + "\n")