In [None]:
import pandas as pd

data = {
    'Department': ['HR', 'HR', 'IT', 'IT', 'Finance', 'Finance'],
    'Employee': ['Alice', 'Bob', 'Charlie', 'David', 'Eva', 'Frank'],
    'Salary': [50000, 55000, 60000, 65000, 70000, 72000],
    'Experience': [2, 3, 5, 4, 6, 7]
}
df = pd.DataFrame(data)

grouped = df.groupby('Department').agg({'Salary': 'sum', 'Experience': 'mean'})
print(grouped)

            Salary  Experience
Department                    
Finance     142000         6.5
HR          105000         2.5
IT          125000         4.5


In [2]:
df['Salary_relative'] = df.groupby('Department')['Salary'].transform(lambda x: x - x.mean())
print(df)

filtered = df.groupby('Department').filter(lambda x: x['Experience'].mean() > 4)
print(filtered)

  Department Employee  Salary  Experience  Salary_relative
0         HR    Alice   50000           2          -2500.0
1         HR      Bob   55000           3           2500.0
2         IT  Charlie   60000           5          -2500.0
3         IT    David   65000           4           2500.0
4    Finance      Eva   70000           6          -1000.0
5    Finance    Frank   72000           7           1000.0
  Department Employee  Salary  Experience  Salary_relative
2         IT  Charlie   60000           5          -2500.0
3         IT    David   65000           4           2500.0
4    Finance      Eva   70000           6          -1000.0
5    Finance    Frank   72000           7           1000.0


In [3]:
df['Gender'] = ['F', 'M', 'M', 'M', 'F', 'M']

multi_grouped = df.groupby(['Department', 'Gender']).agg(
    Total_Salary=('Salary', 'sum'),
    Avg_Experience=('Experience', 'mean')
)
print(multi_grouped)

                   Total_Salary  Avg_Experience
Department Gender                              
Finance    F              70000             6.0
           M              72000             7.0
HR         F              50000             2.0
           M              55000             3.0
IT         M             125000             4.5


In [4]:
df_sorted = df.sort_values('Employee')

df_sorted['Salary_Rolling_Mean'] = df_sorted['Salary'].rolling(window=2).mean()
print(df_sorted)

  Department Employee  Salary  Experience  Salary_relative Gender  \
0         HR    Alice   50000           2          -2500.0      F   
1         HR      Bob   55000           3           2500.0      M   
2         IT  Charlie   60000           5          -2500.0      M   
3         IT    David   65000           4           2500.0      M   
4    Finance      Eva   70000           6          -1000.0      F   
5    Finance    Frank   72000           7           1000.0      M   

   Salary_Rolling_Mean  
0                  NaN  
1              52500.0  
2              57500.0  
3              62500.0  
4              67500.0  
5              71000.0  


In [5]:
df_sorted['Salary_Expanding_Mean'] = df_sorted['Salary'].expanding().mean()
df_sorted['Salary_EWMA'] = df_sorted['Salary'].ewm(alpha=0.5).mean()
print(df_sorted)

  Department Employee  Salary  Experience  Salary_relative Gender  \
0         HR    Alice   50000           2          -2500.0      F   
1         HR      Bob   55000           3           2500.0      M   
2         IT  Charlie   60000           5          -2500.0      M   
3         IT    David   65000           4           2500.0      M   
4    Finance      Eva   70000           6          -1000.0      F   
5    Finance    Frank   72000           7           1000.0      M   

   Salary_Rolling_Mean  Salary_Expanding_Mean   Salary_EWMA  
0                  NaN                50000.0  50000.000000  
1              52500.0                52500.0  53333.333333  
2              57500.0                55000.0  57142.857143  
3              62500.0                57500.0  61333.333333  
4              67500.0                60000.0  65806.451613  
5              71000.0                62000.0  68952.380952  


In [7]:
import pandas as pd

data = {
    'Department': ['HR', 'HR', 'IT', 'IT', 'Finance', 'Finance'],
    'Employee': ['Alice', 'Bob', 'Charlie', 'David', 'Eva', 'Frank'],
    'Salary': [50000, 55000, 60000, 65000, 70000, 72000],
    'Experience': [2, 3, 5, 4, 6, 7]
}

df = pd.DataFrame(data)
# Correct custom aggregation using tuples for named aggregation
custom_agg = df.groupby('Department').agg(
    Salary_Range=('Salary', lambda x: x.max() - x.min()),
    Total_Experience=('Experience', lambda x: x.sum())
)
print(custom_agg)

            Salary_Range  Total_Experience
Department                                
Finance             2000                13
HR                  5000                 5
IT                  5000                 9
