Title: Grouping & Aggregating Data using Pandas<br>
Objective: Learn how to group data and perform aggregations on these groups.

Task 1: Grouping by a Single Column<br>

Task: Group the dataset by 'region' and calculate total sales per region.<br>
Steps:<br>
10. Load the dataset.<br>
11. Use groupby('region') on the DataFrame.<br>
12. Apply .sum() to the 'sales' column.

In [4]:
import pandas as pd
import os

current_dir = os.getcwd()
file_path = os.path.join(current_dir, 'dataset.csv')

try:
    df = pd.read_csv(file_path)
    result = df.groupby('region')['sales'].sum()
    print(result)
except FileNotFoundError as e:
    print(f"Error loading dataset: {e}")


Error loading dataset: [Errno 2] No such file or directory: '/workspaces/AI---ML/src/Module 2/Data Aggreation using Python/dataset.csv'


Task 2: Grouping by Multiple Columns<br>

Task: Group the dataset by 'region' and 'category', then find the average sales.<br>
Steps:<br>
13. Group by ['region', 'category'].<br>
14. Use .mean() on the 'sales' column.<br>
15. Examine the resulting DataFrame structure.

In [3]:
import pandas as pd

# Load the dataset
def load_dataset(file_path):
    try:
        data = pd.read_csv(file_path)
        return data
    except Exception as e:
        print("Error loading dataset:", str(e))
        return None

# Group by 'region' and 'category', then find the average sales
def calculate_average_sales_per_group(data):
    try:
        # Check if required columns exist
        required_columns = ['region', 'category', 'sales']
        if not all(col in data.columns for col in required_columns):
            print("Error: One or more required columns are missing.")
            return None
        
        # Check if 'sales' column is numeric
        if not pd.api.types.is_numeric_dtype(data['sales']):
            print("Error: 'sales' column is not numeric.")
            return None
        
        average_sales_per_group = data.groupby(['region', 'category'])['sales'].mean().reset_index()
        return average_sales_per_group
    except Exception as e:
        print("Error calculating average sales per group:", str(e))
        return None

# Main function
def main():
    file_path = input("Enter the path to your dataset file: ")
    data = load_dataset(file_path)
    if data is not None:
        average_sales_per_group = calculate_average_sales_per_group(data)
        if average_sales_per_group is not None:
            print("Average Sales per Group:")
            print(average_sales_per_group)

if __name__ == "__main__":
    main()






KeyboardInterrupt: Interrupted by user

Task 3: Aggregating Multiple Functions<br>

Task: Group data by 'category' and apply multiple aggregation functions (sum and count) on 'quantity'.<br>
Steps:<br>
16. Group by 'category'.<br>
17. Use .agg(['sum', 'count']) on 'quantity'.<br>
18. Analyze the result to understand how multiple aggregations work.

In [None]:
import pandas as pd

# Load the dataset
def load_dataset(file_path):
    try:
        data = pd.read_csv(file_path)
        return data
    except Exception as e:
        print("Error loading dataset:", str(e))
        return None

# Group by 'category' and apply multiple aggregation functions
def aggregate_quantity(data):
    try:
        # Check if required columns exist
        required_columns = ['category', 'quantity']
        if not all(col in data.columns for col in required_columns):
            print("Error: One or more required columns are missing.")
            return None
        
        # Check if 'quantity' column is numeric
        if not pd.api.types.is_numeric_dtype(data['quantity']):
            print("Error: 'quantity' column is not numeric.")
            return None
        
        aggregated_data = data.groupby('category')['quantity'].agg(['sum', 'count']).reset_index()
        return aggregated_data
    except Exception as e:
        print("Error aggregating quantity:", str(e))
        return None

# Main function
def main():
    file_path = 'dataset.csv'  # Update with your dataset file path
    data = load_dataset(file_path)
    if data is not None:
        aggregated_data = aggregate_quantity(data)
        if aggregated_data is not None:
            print("Aggregated Quantity:")
            print(aggregated_data)

if __name__ == "__main__":
    main()

