In [1]:
import pandas as pd
import numpy as np


### Aggregation Methods in Pandas :
Aggregation methods in Pandas allow you to compute summary statistics (e.g., mean, median, sum) for groups of data in a DataFrame or Series. Aggregation is typically used alongside groupby(), but can also be applied directly to entire DataFrames or Series.



In [8]:
data = {
    'Department': ['HR', 'IT', 'HR', 'IT', 'Finance', 'Finance', 'IT'],
    'Employee': ['Alice', 'Bob', 'Charlie', 'David', 'Eve', 'Frank', 'Grace'],
    'Salary': [50000, 60000, 55000, 70000, 80000, 75000, 72000],
}
df=pd.DataFrame(data)
df

Unnamed: 0,Department,Employee,Salary
0,HR,Alice,50000
1,IT,Bob,60000
2,HR,Charlie,55000
3,IT,David,70000
4,Finance,Eve,80000
5,Finance,Frank,75000
6,IT,Grace,72000


In [20]:
# group by department and compute sum of salary
grouped=df.groupby('Department')['Salary'].sum()
grouped

Department
Finance    155000
HR         105000
IT         202000
Name: Salary, dtype: int64

In [22]:
# group by department and compute mean of salary
grouped1=df.groupby('Department')['Salary'].mean()
grouped1

Department
Finance    77500.000000
HR         52500.000000
IT         67333.333333
Name: Salary, dtype: float64

In [26]:
# Group by department and apply multiple functions
# Applying Multiple Aggregation Functions with .agg()
grouped2=df.groupby('Department')['Salary'].agg(['mean','sum','median','max','min','count'])
grouped2

Unnamed: 0_level_0,mean,sum,median,max,min,count
Department,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Finance,77500.0,155000,77500.0,80000,75000,2
HR,52500.0,105000,52500.0,55000,50000,2
IT,67333.333333,202000,70000.0,72000,60000,3


In [28]:
# aggregate the whole dataframe
result=df[['Salary']].agg(['mean','median','sum','count'])
result


Unnamed: 0,Salary
mean,66000.0
median,70000.0
sum,462000.0
count,7.0


In [30]:
# custom aggregation function : range(max-min)
def salary_range(x):
    return x.max() - x.min()
    
grouped3=df.groupby('Department')['Salary'].agg(['mean',salary_range])
grouped3

Unnamed: 0_level_0,mean,salary_range
Department,Unnamed: 1_level_1,Unnamed: 2_level_1
Finance,77500.0,5000
HR,52500.0,5000
IT,67333.333333,12000


In [40]:
# aggregation without groupby method
total_salary=df['Salary'].sum()
total_salary



462000

In [42]:
count_employee=df['Salary'].count()
count_employee

7