In [1]:
!wget https://storage.googleapis.com/rapidsai/polars-demo/transactions.parquet -O transactions.parquet

--2025-05-08 12:04:37--  https://storage.googleapis.com/rapidsai/polars-demo/transactions.parquet
Resolving storage.googleapis.com (storage.googleapis.com)... 172.253.120.207, 74.125.140.207, 2a00:1450:400c:c08::cf, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|172.253.120.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4274457161 (4.0G) [application/octet-stream]
Saving to: ‘transactions.parquet’


2025-05-08 12:06:18 (40.4 MB/s) - ‘transactions.parquet’ saved [4274457161/4274457161]



In [1]:
import polars as pl
from polars.testing import assert_frame_equal

transactions = pl.scan_parquet("transactions.parquet")
transactions.collect_schema()

Schema([('CUST_ID', String),
        ('START_DATE', Date),
        ('END_DATE', Date),
        ('TRANS_ID', String),
        ('DATE', Date),
        ('YEAR', Int64),
        ('MONTH', Int64),
        ('DAY', Int64),
        ('EXP_TYPE', String),
        ('AMOUNT', Float64)])

In [2]:
transactions

In [3]:
transactions.select(pl.col("AMOUNT").sum()).collect()

AMOUNT
f64
21367000000.0


In [5]:
transactions.select(pl.col("AMOUNT").sum()).collect(engine="gpu")

AMOUNT
f64
21367000000.0


In [6]:
%%time
res_cpu = (
   transactions
   .group_by("CUST_ID")
   .agg(pl.col("AMOUNT").sum())
   .sort(by="AMOUNT", descending=True)
   .head()
   .collect()
)
res_cpu

CPU times: user 48.4 s, sys: 11.6 s, total: 1min
Wall time: 7.03 s


CUST_ID,AMOUNT
str,f64
"""CP2KXQSX9I""",2310800.0
"""CGOKEO2EH4""",2272100.0
"""CZ1KK7E2PK""",2240900.0
"""CXYJF3GWQU""",2238900.0
"""CZ0G7ZK6HA""",2211900.0


In [7]:
%%time
res_gpu = (
   transactions
   .group_by("CUST_ID")
   .agg(pl.col("AMOUNT").sum())
   .sort(by="AMOUNT", descending=True)
   .head()
   .collect(engine="gpu")
)
res_gpu

CPU times: user 1.25 s, sys: 459 ms, total: 1.71 s
Wall time: 1.17 s


CUST_ID,AMOUNT
str,f64
"""CP2KXQSX9I""",2310800.0
"""CGOKEO2EH4""",2272100.0
"""CZ1KK7E2PK""",2240900.0
"""CXYJF3GWQU""",2238900.0
"""CZ0G7ZK6HA""",2211900.0
