forked from pola-rs/polars
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pandas_groupby.py
29 lines (24 loc) · 1015 Bytes
/
pandas_groupby.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import datetime
import pandas as pd
import glob
from cmp.utils import peak_memory
files = glob.glob("../data/1*.csv")
files.sort()
with open("../data/mem_pandas.txt", "w") as mem_f:
with open("../data/python_bench.txt", "w") as f:
with open("../data/python_bench_str.txt", "w") as f_str:
for fn in files:
df = pd.read_csv(fn)
df = df.astype({"str": "str"})
t0 = datetime.datetime.now()
for _ in range(3):
res = df.groupby("groups").sum()
duration = (datetime.datetime.now() - t0) / 3
f.write(f"{duration.microseconds}\n")
df = df[["str", "values"]]
t0 = datetime.datetime.now()
for _ in range(3):
res = df.groupby("str").sum()
duration = (datetime.datetime.now() - t0) / 3
f_str.write(f"{duration.microseconds}\n")
mem_f.write(str(peak_memory()) + "\n")