In [1]:
import pandas as pd
import numpy as np


In Pandas, you can apply custom aggregation functions to group data or transform a column using the .apply(), .agg(), or .transform() methods. These methods allow you to pass a custom function that operates on the DataFrame, Series, or group of data.

In [9]:
# 1. Using .apply() for Row or Column Operations
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Salary': [50000, 60000, 55000],
    'Age': [25, 30, 35]
}
df=pd.DataFrame(data)
df

Unnamed: 0,Name,Salary,Age
0,Alice,50000,25
1,Bob,60000,30
2,Charlie,55000,35


In [11]:
# custome function range of salary and age
def calculate_range(row):
    return row.max() - row.min()
# apply custome function row-size(axis=1)
df['range']=df[['Salary','Age']].apply(calculate_range,axis=1)
d

Unnamed: 0,Name,Salary,Age,range
0,Alice,50000,25,49975
1,Bob,60000,30,59970
2,Charlie,55000,35,54965


In [18]:
data = {
    'Department': ['HR', 'IT', 'HR', 'IT', 'Finance', 'Finance', 'IT'],
    'Salary': [50000, 60000, 55000, 70000, 80000, 75000, 72000],
    'Age': [25, 30, 28, 35, 45, 42, 38]
}
df=pd.DataFrame(data)
df

Unnamed: 0,Department,Salary,Age
0,HR,50000,25
1,IT,60000,30
2,HR,55000,28
3,IT,70000,35
4,Finance,80000,45
5,Finance,75000,42
6,IT,72000,38


In [26]:
# Group by department and apply custom aggregation functions
def age_range(x):
    return x.max() - x.min()
    
grouped=df.groupby('Department').agg(total_salary=("Salary",'sum'),range=('Age',age_range))
grouped

Unnamed: 0_level_0,total_salary,range
Department,Unnamed: 1_level_1,Unnamed: 2_level_1
Finance,155000,3
HR,105000,3
IT,202000,8


In [28]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Salary': [50000, 60000, 55000],
    'Age': [25, 30, 35]
}
df=pd.DataFrame(data)
df

Unnamed: 0,Name,Salary,Age
0,Alice,50000,25
1,Bob,60000,30
2,Charlie,55000,35


In [30]:
# Custom function to normalize a series
def normalize(series):
    return (series - series.mean()) / series.std()

# Apply normalization on the 'Salary' column using .transform()
df['Normalized_Salary'] = df['Salary'].transform(normalize)

df

Unnamed: 0,Name,Salary,Age,Normalized_Salary
0,Alice,50000,25,-1.0
1,Bob,60000,30,1.0
2,Charlie,55000,35,0.0


In [34]:
data = {
    'Department': ['HR', 'IT', 'HR', 'IT', 'Finance', 'Finance', 'IT'],
    'Salary': [50000, 60000, 55000, 70000, 80000, 75000, 72000],
    'Age': [25, 30, 28, 35, 45, 42, 38],
    'Experience': [2, 5, 3, 7, 10, 9, 8]  # Years of experience
}
df=pd.DataFrame(data)
df

Unnamed: 0,Department,Salary,Age,Experience
0,HR,50000,25,2
1,IT,60000,30,5
2,HR,55000,28,3
3,IT,70000,35,7
4,Finance,80000,45,10
5,Finance,75000,42,9
6,IT,72000,38,8


In [36]:

# Custom function to calculate range (max - min)
def age_range(x):
    return x.max() - x.min()

# Custom function to calculate variance
def salary_variance(x):
    return np.var(x)

# Group by department and apply multiple aggregation functions
grouped = df.groupby('Department').agg(
    total_salary=('Salary', 'sum'),               # Total salary per department
    avg_salary=('Salary', 'mean'),                # Average salary per department
    salary_variance=('Salary', salary_variance),  # Variance of salary per department (custom function)
    max_salary=('Salary', 'max'),                 # Maximum salary per department
    avg_age=('Age', 'mean'),                      # Average age per department
    age_range=('Age', age_range),                 # Range of ages per department (custom function)
    age_stddev=('Age', 'std')                     # Standard deviation of age per department
)
grouped

Unnamed: 0_level_0,total_salary,avg_salary,salary_variance,max_salary,avg_age,age_range,age_stddev
Department,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Finance,155000,77500.0,6250000.0,80000,43.5,3,2.12132
HR,105000,52500.0,6250000.0,55000,26.5,3,2.12132
IT,202000,67333.333333,27555560.0,72000,34.333333,8,4.041452
