In [2]:
import pandas as pd

# Sample data for the DataFrame
data = {
    'Category': ['A', 'B', 'A', 'B', 'C', 'A', 'C', 'B', 'C', 'A'],
    'Region': ['North', 'South', 'North', 'West', 'East', 'East', 'South', 'West', 'North', 'East'],
    'Item': ['Item1', 'Item2', 'Item3', 'Item4', 'Item5', 'Item6', 'Item7', 'Item8', 'Item9', 'Item10'],
    'Amount': [120, 200, 150, 80, 300, 400, 250, 190, 180, 220],
    'Value': [10, 15, 10, 5, 30, 25, 20, 18, 12, 24]
}

df = pd.DataFrame(data)

# Display the DataFrame
df


Unnamed: 0,Category,Region,Item,Amount,Value
0,A,North,Item1,120,10
1,B,South,Item2,200,15
2,A,North,Item3,150,10
3,B,West,Item4,80,5
4,C,East,Item5,300,30
5,A,East,Item6,400,25
6,C,South,Item7,250,20
7,B,West,Item8,190,18
8,C,North,Item9,180,12
9,A,East,Item10,220,24


In [4]:
# 1. **Basic Grouping**  
#    - **Question**: How do you group the `df` DataFrame by the `Category` column and calculate the sum for each group?  
#    - **Example**: Given the `df` DataFrame, calculate the total `Amount` for each `Category`.

df.groupby('Category')["Amount"].sum()

Category
A    890
B    470
C    730
Name: Amount, dtype: int64

In [6]:
# 2. **Multiple Columns Grouping**  
#    - **Question**: How do you group the `df` DataFrame by both `Region` and `Category` columns and find the mean `Amount` for each group?  
#    - **Example**: Calculate the mean `Amount` for each combination of `Region` and `Category`.

df.groupby(["Region","Category"])["Amount"].mean()

Region  Category
East    A           310.0
        C           300.0
North   A           135.0
        C           180.0
South   B           200.0
        C           250.0
West    B           135.0
Name: Amount, dtype: float64

In [7]:
# 3. **Aggregating with Different Functions**  
#    - **Question**: How do you apply different aggregation functions (`sum`, `mean`, `count`) to different columns when grouping by `Category`?  
#    - **Example**: After grouping by `Category`, calculate the sum of `Amount`, the mean of `Value`, and the count of `Item` in each group.

print("Mean: ",df.groupby("Category")["Amount"].mean())
print("Sum: ",df.groupby("Category")["Amount"].sum())
print("Count: ",df.groupby("Category")["Amount"].count())

Mean:  Category
A    222.500000
B    156.666667
C    243.333333
Name: Amount, dtype: float64
Sum:  Category
A    890
B    470
C    730
Name: Amount, dtype: int64
Count:  Category
A    4
B    3
C    3
Name: Amount, dtype: int64


In [11]:
# 4. **Using `agg()` Method**  
#    - **Question**: How do you use the `agg()` method to apply both `sum()` and `max()` to the `Amount` column after grouping by `Category`?  
#    - **Example**: Apply `sum()` and `max()` to the `Amount` column after grouping by `Category`.

df.groupby("Category")["Amount"].agg(["sum","max"])


Unnamed: 0_level_0,sum,max
Category,Unnamed: 1_level_1,Unnamed: 2_level_1
A,890,400
B,470,200
C,730,300


In [12]:
# 5. **Using `transform()` for Element-wise Transformations**  
#    - **Question**: How can you use the `transform()` method to standardize the `Value` column within each group (grouped by `Category`)?  
#    - **Example**: Standardize the `Value` column within each `Category` group by subtracting the mean and dividing by the standard deviation.


df.groupby("Category")["Value"].transform(lambda x: (x - x.mean())/ x.std())

0   -0.864997
1    0.342791
2   -0.864997
3   -1.126315
4    1.034910
5    0.924652
6   -0.073922
7    0.783523
8   -0.960988
9    0.805342
Name: Value, dtype: float64

In [16]:
# 6. **Grouping by Index Level**  
#    - **Question**: How do you group by an index level in a DataFrame with a multi-level index (e.g., `['Region', 'Category']`)?  
#    - **Example**: After setting a multi-level index with `Region` and `Category`, group by `Region` and calculate the mean `Amount` for each region.

df.groupby(["Region","Category"])["Amount"].mean()

Region  Category
East    A           310.0
        C           300.0
North   A           135.0
        C           180.0
South   B           200.0
        C           250.0
West    B           135.0
Name: Amount, dtype: float64

In [24]:
# 7. **Grouping and Filtering**  
#    - **Question**: How do you filter the groups in `df` based on a condition applied to the aggregation result, such as keeping only groups where the sum of `Amount` is greater than 400?  
#    - **Example**: Filter out groups where the total `Amount` for a `Category` is less than or equal to 400.

grouped = df.groupby("Category")["Amount"].sum()
grouped[grouped > 400]


Category
A    890
B    470
C    730
Name: Amount, dtype: int64

In [25]:
# 8. **Using `size()` to Count Groups**  
#    - **Question**: How can you count the number of rows in each group after grouping by `Category`?  
#    - **Example**: Use the `size()` function to count how many items exist in each `Category` group.

df.groupby("Category")["Item"].size()

Category
A    4
B    3
C    3
Name: Item, dtype: int64

In [26]:

# 9. **Custom Grouping Function**  
#    - **Question**: How do you apply a custom function to the grouped data to find the difference 
#                   between the maximum and minimum `Amount` for each group after grouping by `Category`?  
#    - **Example**: Define a custom function that calculates the difference between the maximum and minimum `Amount` 
#                   in each group and apply it using `groupby()`.

def min_max_diff(group):
    return group["Amount"].max() - group["Amount"].min()

df.groupby("Category").apply(min_max_diff)


  df.groupby("Category").apply(min_max_diff)


Category
A    280
B    120
C    120
dtype: int64

In [27]:
# 10. **Sorting and Renaming Grouped Results**  
#     - **Question**: After grouping by `Category`, how do you sort the results by the mean `Amount` in descending order and rename the columns to `Category` and `AverageAmount`?  
#     - **Example**: Sort the grouped `Category` DataFrame by the mean of `Amount` and rename the columns.


df.groupby("Category")["Amount"].sum().sort_values()

Category
B    470
C    730
A    890
Name: Amount, dtype: int64