# 📦 E-Commerce Data Analytics Project
Simulate, process, and visualize a full pipeline of e-commerce data using Python in a Google Colab environment.

**Author:** Lahitha Motupalli  
**Generated on:** June 04, 2025

---

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
np.random.seed(42)

## 🔧 Generate Synthetic E-commerce Dataset

In [None]:
n_orders = 10000
data = pd.DataFrame({
    'OrderID': np.arange(1, n_orders + 1),
    'CustomerID': np.random.randint(1000, 2000, size=n_orders),
    'OrderDate': pd.to_datetime(np.random.choice(pd.date_range(start='2022-01-01', end='2023-12-31'), n_orders)),
    'Region': np.random.choice(['North', 'South', 'East', 'West'], size=n_orders),
    'ProductCategory': np.random.choice(['Electronics', 'Clothing', 'Home', 'Books'], size=n_orders),
    'OrderAmount': np.round(np.random.exponential(scale=120.0, size=n_orders), 2),
    'Returned': np.random.choice([0, 1], size=n_orders, p=[0.9, 0.1])
})
data['Month'] = data['OrderDate'].dt.to_period('M')
data.head()

## 📈 Analyze the Data

In [None]:
# Monthly Revenue by Region
monthly_revenue = data.groupby(['Month', 'Region'])['OrderAmount'].sum().unstack().fillna(0)

# Avg Order Amount per Category
avg_order_by_category = data.groupby('ProductCategory')['OrderAmount'].mean().sort_values()

# Return Rate by Product Category
return_rate = data.groupby('ProductCategory')['Returned'].mean()

## 📊 Visualize Insights

In [None]:
monthly_revenue.plot(figsize=(12,6), marker='o')
plt.title('Monthly Revenue by Region')
plt.ylabel('Revenue ($)')
plt.xlabel('Month')
plt.xticks(rotation=45)
plt.tight_layout()
plt.grid(True)
plt.show()

In [None]:
plt.figure(figsize=(8,5))
sns.barplot(x=avg_order_by_category.index, y=avg_order_by_category.values)
plt.title('Average Order Amount by Product Category')
plt.ylabel('Average Amount ($)')
plt.xlabel('Product Category')
plt.grid(axis='y')
plt.show()

In [None]:
plt.figure(figsize=(8,5))
sns.barplot(x=return_rate.index, y=return_rate.values)
plt.title('Return Rate by Product Category')
plt.ylabel('Return Rate')
plt.xlabel('Product Category')
plt.grid(axis='y')
plt.show()