In [27]:
import pandas as pd

data = {
    'Region': ['North', 'North', 'South', 'South', 'East', 'East', 'West', 'West', 'North'],
    'Category': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A'],
    'Sales': [100, 150, 200, 250, 300, 350, 400, 450, 500],
    'Expenses': [30, 45, 50, 60, 75, 85, 90, 100, 110]
}

df = pd.DataFrame(data)
df


Unnamed: 0,Region,Category,Sales,Expenses
0,North,A,100,30
1,North,B,150,45
2,South,A,200,50
3,South,B,250,60
4,East,A,300,75
5,East,B,350,85
6,West,A,400,90
7,West,B,450,100
8,North,A,500,110


In [28]:
df['Region_Average'] = df.groupby('Region')['Sales'].transform('mean')
df



Unnamed: 0,Region,Category,Sales,Expenses,Region_Average
0,North,A,100,30,250.0
1,North,B,150,45,250.0
2,South,A,200,50,225.0
3,South,B,250,60,225.0
4,East,A,300,75,325.0
5,East,B,350,85,325.0
6,West,A,400,90,425.0
7,West,B,450,100,425.0
8,North,A,500,110,250.0


In [29]:
df['Centered_Sales'] = df.groupby('Region')['Sales'].transform(lambda x: x - x.mean())
df


Unnamed: 0,Region,Category,Sales,Expenses,Region_Average,Centered_Sales
0,North,A,100,30,250.0,-150.0
1,North,B,150,45,250.0,-100.0
2,South,A,200,50,225.0,-25.0
3,South,B,250,60,225.0,25.0
4,East,A,300,75,325.0,-25.0
5,East,B,350,85,325.0,25.0
6,West,A,400,90,425.0,-25.0
7,West,B,450,100,425.0,25.0
8,North,A,500,110,250.0,250.0


In [30]:
df['Normalized_Sales'] = df.groupby('Region')['Sales'].transform(lambda x: (x - x.min()) / (x.max() - x.min()))
df


Unnamed: 0,Region,Category,Sales,Expenses,Region_Average,Centered_Sales,Normalized_Sales
0,North,A,100,30,250.0,-150.0,0.0
1,North,B,150,45,250.0,-100.0,0.125
2,South,A,200,50,225.0,-25.0,0.0
3,South,B,250,60,225.0,25.0,1.0
4,East,A,300,75,325.0,-25.0,0.0
5,East,B,350,85,325.0,25.0,1.0
6,West,A,400,90,425.0,-25.0,0.0
7,West,B,450,100,425.0,25.0,1.0
8,North,A,500,110,250.0,250.0,1.0


In [31]:
df['Sales_Expenses_Ratio'] = df['Sales'] / df['Expenses']
df


Unnamed: 0,Region,Category,Sales,Expenses,Region_Average,Centered_Sales,Normalized_Sales,Sales_Expenses_Ratio
0,North,A,100,30,250.0,-150.0,0.0,3.333333
1,North,B,150,45,250.0,-100.0,0.125,3.333333
2,South,A,200,50,225.0,-25.0,0.0,4.0
3,South,B,250,60,225.0,25.0,1.0,4.166667
4,East,A,300,75,325.0,-25.0,0.0,4.0
5,East,B,350,85,325.0,25.0,1.0,4.117647
6,West,A,400,90,425.0,-25.0,0.0,4.444444
7,West,B,450,100,425.0,25.0,1.0,4.5
8,North,A,500,110,250.0,250.0,1.0,4.545455


In [32]:
df['Is_Outlier'] = df.groupby('Region')['Sales'].transform(lambda x: (x - x.mean()).abs() > 2 * x.std())
df


Unnamed: 0,Region,Category,Sales,Expenses,Region_Average,Centered_Sales,Normalized_Sales,Sales_Expenses_Ratio,Is_Outlier
0,North,A,100,30,250.0,-150.0,0.0,3.333333,False
1,North,B,150,45,250.0,-100.0,0.125,3.333333,False
2,South,A,200,50,225.0,-25.0,0.0,4.0,False
3,South,B,250,60,225.0,25.0,1.0,4.166667,False
4,East,A,300,75,325.0,-25.0,0.0,4.0,False
5,East,B,350,85,325.0,25.0,1.0,4.117647,False
6,West,A,400,90,425.0,-25.0,0.0,4.444444,False
7,West,B,450,100,425.0,25.0,1.0,4.5,False
8,North,A,500,110,250.0,250.0,1.0,4.545455,False


In [33]:
df['Sales_Classification'] = df.groupby('Region')['Sales'].transform(
    lambda x: pd.qcut(x, q=3, labels=['Low', 'Medium', 'High'])
)
df


Unnamed: 0,Region,Category,Sales,Expenses,Region_Average,Centered_Sales,Normalized_Sales,Sales_Expenses_Ratio,Is_Outlier,Sales_Classification
0,North,A,100,30,250.0,-150.0,0.0,3.333333,False,Low
1,North,B,150,45,250.0,-100.0,0.125,3.333333,False,Medium
2,South,A,200,50,225.0,-25.0,0.0,4.0,False,Low
3,South,B,250,60,225.0,25.0,1.0,4.166667,False,High
4,East,A,300,75,325.0,-25.0,0.0,4.0,False,Low
5,East,B,350,85,325.0,25.0,1.0,4.117647,False,High
6,West,A,400,90,425.0,-25.0,0.0,4.444444,False,Low
7,West,B,450,100,425.0,25.0,1.0,4.5,False,High
8,North,A,500,110,250.0,250.0,1.0,4.545455,False,High
