forked from pola-rs/polars
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_db_benchmark.py
45 lines (41 loc) · 1.47 KB
/
test_db_benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import polars as pl
import io
csv = rb"""id1,id2,id3,id4,id5,id6,v1,v2,v3
id046,id007,id0000043878,51,10,59276,1,2,9.33179
id041,id026,id0000068300,12,58,78315,4,2,24.555835
id036,id078,id0000012244,25,9,27300,4,15,15.146486
id067,id100,id0000006157,54,38,65416,2,8,68.837472
id047,id025,id0000029319,72,92,19046,4,9,96.766937
id003,id045,id0000068931,87,74,60479,3,6,46.007797
id052,id046,id0000011793,6,32,90599,4,13,14.736708
id024,id050,id0000033725,89,85,8657,3,7,0.185677
id051,id099,id0000018331,12,26,19634,5,6,71.424675
id048,id094,id0000033175,76,23,38595,5,10,76.893685
id090,id059,id0000052627,1,62,20177,3,7,30.0487
id037,id097,id0000062401,48,8,53992,5,15,83.565443
id073,id081,id0000017280,54,90,28480,5,4,17.078693
id081,id073,id0000073423,51,22,39788,2,12,45.883758
id062,id080,id0000092749,1,75,67857,3,10,80.418674
id045,id031,id0000076210,2,42,80312,4,5,48.668692
id082,id048,id0000080227,56,62,16760,3,11,34.933239
id035,id032,id0000033279,55,13,80560,5,5,61.372678
id053,id013,id0000073898,61,63,12387,4,7,29.949863"""
f = io.BytesIO(csv)
x = pl.read_csv(
f,
dtype={
"id4": pl.Int32,
"id5": pl.Int32,
"id6": pl.Int32,
"v1": pl.Int32,
"v2": pl.Int32,
"v3": pl.Float64,
},
)
x["id1"] = x["id1"].cast(pl.Categorical)
x["id2"] = x["id2"].cast(pl.Categorical)
x["id3"] = x["id3"].cast(pl.Categorical)
x = x.lazy()
question = "sum v1 by id1" # q1
ans = x.groupby("id1").agg(pl.sum("v1")).collect()
print(ans.shape, flush=True)