# Retail Sales Analytics — EDA

Objectives:
1. Load and clean data
2. Compute KPIs (Sales, Profit, Profit Margin, Avg Discount)
3. Explore trends and drivers
4. Capture business insights


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
pd.set_option('display.max_columns', 50)
DATA_PATH = Path('..') / 'data' / 'retail_sales.csv'
df = pd.read_csv(DATA_PATH, parse_dates=['Order_Date'])
df.head()

In [None]:
# Cleaning and feature engineering
df = df.drop_duplicates()
df['Profit_Margin'] = np.where(df['Sales']>0, df['Profit']/df['Sales'], 0.0)
df['YearMonth'] = df['Order_Date'].dt.to_period('M').astype(str)
df.info()

In [None]:
# KPIs
kpis = {
    'Total_Sales': float(df['Sales'].sum()),
    'Total_Profit': float(df['Profit'].sum()),
    'Avg_Discount': float(df['Discount'].mean()),
    'Profit_Margin': float(df['Profit'].sum() / df['Sales'].sum()),
}
kpis

In [None]:
# Monthly trend
monthly = df.groupby('YearMonth', as_index=False)['Sales'].sum().sort_values('YearMonth')
monthly.tail(10)

In [None]:
# Region profit
region_profit = df.groupby('Region', as_index=False)['Profit'].sum().sort_values('Profit', ascending=False)
region_profit

## Notes / Insights
- Summarize the top 3 drivers of revenue.
- Identify where discounts hurt margins.
- Recommend pricing or promotion changes.
