# Day 4 - Pandas: Data Manipulation I (Superstore Dataset)
Working with real-world retail data to access, filter, clean, group, and merge.

## Import Libraries and Load Dataset

In [4]:
import pandas as pd

# Load the Superstore dataset
df = pd.read_csv('2e30ffd1-2b0e-4bcc-9fb5-c8c9c112834e.csv', encoding='ISO-8859-1')

# Clean column names: lowercase, replace spaces and hyphens
df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('-', '_')

# Show the first few rows
df.head()


ModuleNotFoundError: No module named 'pandas'

## Task 1: Indexing and Slicing

In [None]:
# Access using loc and iloc
print(df.loc[0, 'sales'])
print(df.iloc[0:3, 3:6])

# Select rows and columns
print(df[['region', 'sales', 'profit']])
print(df[df['sales'] > 500])


## Task 2: Filtering and Sorting

In [None]:
# Filter rows with sales > 500 and region is 'South'
filtered = df[(df['sales'] > 500) & (df['region'] == 'South')]
print(filtered[['order_id', 'sales', 'region']])

# Sort by Profit and then Category
df.sort_by_profit = df.sort_values('profit', ascending=False)
df.sort_by_category_sales = df.sort_values(by=['category', 'sales'])
print(df.sort_by_profit.head())
print(df.sort_by_category_sales.head())


## Task 3: Handling Missing Data

In [None]:
# Check for missing values
print(df.isnull().sum())

# Fill or drop missing values
df_filled = df.fillna(0)
df_dropped = df.dropna()

# Show filled data example
print(df_filled.head())


## Task 4: GroupBy Operations

In [None]:
# Average sales by category
print(df.groupby('category')['sales'].mean())

# Aggregation by region
region_stats = df.groupby('region')['sales'].agg(['sum', 'mean', 'count']).reset_index()
print(region_stats)


## Task 5: Merging Example

In [None]:
# Create dummy customer and orders tables
orders = df[['order_id', 'customer_id']]
customers = df[['customer_id', 'customer_name']].drop_duplicates()

# Merge the two
merged = pd.merge(orders, customers, on='customer_id', how='inner')
print(merged.head())
