# Aggregations, Window Functions, and UDFs 

## Simple aggregations

### How to do it...

In [55]:
import polars as pl

In [56]:
df = pl.read_csv('../data/constoso_sales.csv', try_parse_dates=True)

In [57]:
df.head()

Order Number,Line Number,Order Date,Delivery Date,Customer Name,Customer Gender,Customer Country,Customer Age,Store Name,Product Name,Color,Brand,Category,Subcategory,Quantity,Unit Price,Net Price,Unit Cost,Currency Code,Exchange Rate
i64,i64,date,date,str,str,str,i64,str,str,str,str,str,str,i64,f64,f64,f64,str,f64
284806,1,2017-10-18,2017-10-20,"""Eric Kennedy""","""Male""","""United States""",47,"""Online store""","""Contoso 512MB …","""Silver""","""Contoso""","""Audio""","""MP4&MP3""",7,11.691,10.288,5.958,"""USD""",1.0
285506,1,2017-10-25,2017-10-26,"""George Tooth""","""Male""","""Australia""",30,"""Online store""","""Contoso 512MB …","""Silver""","""Contoso""","""Audio""","""MP4&MP3""",1,11.691,11.691,5.958,"""AUD""",1.2967
311002,2,2018-07-07,2018-07-12,"""Caleb Greene""","""Male""","""Australia""",59,"""Online store""","""Contoso 512MB …","""Silver""","""Contoso""","""Audio""","""MP4&MP3""",6,12.99,12.99,6.62,"""AUD""",1.3484
366307,2,2020-01-11,2020-01-11,"""Isaac Siddins""","""Male""","""Australia""",25,"""Contoso Store …","""Contoso 512MB …","""Blue""","""Contoso""","""Audio""","""MP4&MP3""",4,12.99,12.99,6.62,"""AUD""",1.4545
325708,3,2018-12-01,2018-12-02,"""Mike McQueen""","""Male""","""United States""",56,"""Online store""","""Contoso 512MB …","""Blue""","""Contoso""","""Audio""","""MP4&MP3""",2,12.99,11.5611,6.62,"""USD""",1.0


In [58]:
from polars import selectors as cs
(
    df
    .select(cs.numeric())
    .sum()
)

Order Number,Line Number,Customer Age,Quantity,Unit Price,Net Price,Unit Cost,Exchange Rate
i64,i64,i64,i64,f64,f64,f64,f64
4466019052,16195,725757,43517,4178500.0,3928600.0,1735600.0,14124.4597


In [59]:
s = df.select('Quantity').to_series()
s.sum()

43517

In [60]:
df.select(pl.col('Quantity').sum())

Quantity
i64
43517


In [64]:
df.select(
    pl.col('Customer Name').first().alias('Cust Name First'),
    pl.col('Customer Name').last().alias('Cust Name Last')
)

Cust Name First,Cust Name Last
str,str
"""Eric Kennedy""","""Billy Ratliff"""


In [61]:
df.select(cs.numeric()).describe()

describe,Order Number,Line Number,Customer Age,Quantity,Unit Price,Net Price,Unit Cost,Exchange Rate
str,f64,f64,f64,f64,f64,f64,f64,f64
"""count""",13915.0,13915.0,13915.0,13915.0,13915.0,13915.0,13915.0,13915.0
"""null_count""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""mean""",320949.985771,1.163852,52.15645,3.127345,300.28425,282.32739,124.731364,1.015053
"""std""",28431.79136,1.361349,19.133881,2.233597,405.538975,381.738847,147.944094,0.171927
"""min""",269500.0,0.0,19.0,1.0,0.95,0.8265,0.48,0.7015
"""25%""",295901.0,0.0,36.0,1.0,46.99,43.4,21.92,0.8965
"""50%""",319806.0,1.0,52.0,2.0,207.987,194.91,86.68,1.0
"""75%""",345106.0,2.0,68.0,4.0,361.2,336.0,160.93,1.0
"""max""",371503.0,6.0,85.0,10.0,3748.5,3748.5,1241.955,1.5373


### There is more...

In [49]:
df.select(
    (pl.col('Quantity') >= 4).sum()
)

Quantity
u32
4423


In [47]:
df.select(
    pl.col('Quantity').filter(pl.col('Store Name')=='Online store').sum()
)

Quantity
i64
25017
