Title: Grouping & Aggregating Data using Pandas<br>
Objective: Learn how to group data and perform aggregations on these groups.

Task 1: Grouping by a Single Column<br>

Task: Group the dataset by 'region' and calculate total sales per region.<br>
Steps:<br>
10. Load the dataset.<br>
11. Use groupby('region') on the DataFrame.<br>
12. Apply .sum() to the 'sales' column.

In [None]:
import pandas as pd

# Step 10: Load the dataset
# Replace 'sales_data.csv' with the actual path to your file if needed
try:
    sales_df = pd.read_csv('sales_data.csv')
    print("Dataset loaded successfully!")
    print("\nFirst few rows of your data:")
    print(sales_df.head())

    # Step 11: Use groupby('region') on the DataFrame
    grouped_by_region = sales_df.groupby('region')

    # Step 12: Apply .sum() to the 'sales' column
    total_sales_per_region = grouped_by_region['sales'].sum()

    print("\nTotal Sales per Region:")
    print(total_sales_per_region)

except FileNotFoundError:
    print(f"Error: The file 'sales_data.csv' was not found. Please make sure the file is in the correct directory or provide the full path.")
except KeyError as e:
    print(f"Error: Column '{e}' not found in the dataset. Please ensure your dataset has a 'region' and a 'sales' column.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

Task 2: Grouping by Multiple Columns<br>

Task: Group the dataset by 'region' and 'category', then find the average sales.<br>
Steps:<br>
13. Group by ['region', 'category'].<br>
14. Use .mean() on the 'sales' column.<br>
15. Examine the resulting DataFrame structure.

In [None]:
import pandas as pd

# Step 10: Load the dataset (assuming 'sales_data.csv' is in the same directory)
try:
    sales_df = pd.read_csv('sales_data.csv')
    print("Dataset loaded successfully!")

    # Step 13: Group by ['region', 'category']
    grouped_by_region_category = sales_df.groupby(['region', 'category'])

    # Step 14: Use .mean() on the 'sales' column
    average_sales_per_region_category = grouped_by_region_category['sales'].mean()

    print("\nAverage Sales per Region and Category:")
    print(average_sales_per_region_category)

    # Step 15: Examine the resulting DataFrame structure
    print("\nType of the result:", type(average_sales_per_region_category))
    print("\nIndex of the result:")
    print(average_sales_per_region_category.index)

    average_sales_df = average_sales_per_region_category.reset_index()
    print("\nResulting DataFrame structure:")
    print(average_sales_df)

except FileNotFoundError:
    print(f"Error: The file 'sales_data.csv' was not found. Please make sure the file is in the correct directory or provide the full path.")
except KeyError as e:
    print(f"Error: Column '{e}' not found in the dataset. Please ensure your dataset has 'region', 'category', and 'sales' columns.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

Task 3: Aggregating Multiple Functions<br>

Task: Group data by 'category' and apply multiple aggregation functions (sum and count) on 'quantity'.<br>
Steps:<br>
16. Group by 'category'.<br>
17. Use .agg(['sum', 'count']) on 'quantity'.<br>
18. Analyze the result to understand how multiple aggregations work.

In [None]:
import pandas as pd

# Step 10: Load the dataset (assuming 'sales_data.csv' is in the same directory)
try:
    sales_df = pd.read_csv('sales_data.csv')
    print("Dataset loaded successfully!")

    # Step 16: Group by 'category'
    grouped_by_category = sales_df.groupby('category')

    # Step 17: Use .agg(['sum', 'count']) on 'quantity'
    quantity_summary = grouped_by_category['quantity'].agg(['sum', 'count'])

    print("\nQuantity Summary by Category (Sum and Count):")
    print(quantity_summary)

    # Step 18: Analyze the result
    print("\nType of the result:", type(quantity_summary))
    print("\nIndex of the result (Categories):")
    print(quantity_summary.index)
    print("\nColumns of the result (Aggregation functions):")
    print(quantity_summary.columns)

except FileNotFoundError:
    print(f"Error: The file 'sales_data.csv' was not found. Please make sure the file is in the correct directory or provide the full path.")
except KeyError as e:
    print(f"Error: Column '{e}' not found in the dataset. Please ensure your dataset has 'category' and 'quantity' columns.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")