* Aggregate functions -> min, max, mean, median and so on.

In [18]:
import pandas as pd
import numpy as np
from random import shuffle

In [19]:
pg = ['A','B','C','D'] * 25
supplier = ['S1','S2'] * 50
shuffle(pg)
shuffle(supplier)

In [28]:
df = pd.DataFrame({'product_code':np.arange(100,200),
                  'product_group':pg,
                  'supplier':supplier,
                  'price':np.round(np.random.rand(100)*5,2),
                  'sales_qty':np.random.randint(10,200, size=100)})
df.head()

Unnamed: 0,product_code,product_group,supplier,price,sales_qty
0,100,C,S2,0.86,57
1,101,B,S1,3.29,132
2,102,B,S2,3.65,199
3,103,A,S2,4.34,116
4,104,D,S2,4.93,145


In [29]:
df[['price','sales_qty']].mean()

price          2.5493
sales_qty    101.7500
dtype: float64

In [30]:
# multiple aggregation with a single column

df['price'].apply(['sum','min','max'])

sum    254.93
min      0.00
max      4.99
Name: price, dtype: float64

In [32]:
# multiple aggregation with multiple columns

df[['price','sales_qty']].apply(['mean','median'])

Unnamed: 0,price,sales_qty
mean,2.5493,101.75
median,2.75,103.0


In [34]:
# different aggregate functions with different columns
# way-1

df.aggregate({'price':['sum','min','max','mean'],
             'sales_qty':['sum','mean','max']})

Unnamed: 0,price,sales_qty
sum,254.93,10175.0
min,0.0,
max,4.99,199.0
mean,2.5493,101.75


In [36]:
# different aggregate functions with different columns
# way-2

df[['price','sales_qty']].apply({'price':'mean',
                                'sales_qty':'median'})

price          2.5493
sales_qty    103.0000
dtype: float64

In [37]:
# different aggregate functions with different columns
# way-3 using agg() function

df[['price','sales_qty']].apply(['mean','median'])

Unnamed: 0,price,sales_qty
mean,2.5493,101.75
median,2.75,103.0


* The agg function also allows for assigning customized names to the aggregated columns (named aggregation)

In [38]:
df[['price','sales_qty']].agg(avg_price=('price','mean'),
                             max_price=('price','max'),
                             median_value=('sales_qty','median'))

Unnamed: 0,price,sales_qty
avg_price,2.5493,
max_price,4.99,
median_value,,103.0


In [40]:
# named aggregation useful when working with groupby function

df.groupby('product_group').agg(avg_price=('price','mean'),
                             max_price=('price','max'),
                             median_value=('sales_qty','median'),
                               product_count=('product_code','count'))

Unnamed: 0_level_0,avg_price,max_price,median_value,product_count
product_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,2.3468,4.73,116.0,25
B,2.8236,4.99,115.0,25
C,2.496,4.68,93.0,25
D,2.5308,4.93,80.0,25


* Using describe() function we can get different types of aggregations in our output 

In [41]:
df[['price','sales_qty']].describe()

Unnamed: 0,price,sales_qty
count,100.0,100.0
mean,2.5493,101.75
std,1.542646,53.736427
min,0.0,11.0
25%,1.1725,56.5
50%,2.75,103.0
75%,4.0,144.25
max,4.99,199.0
