# E-commerce Sales Performance Analysis

This notebook provides a comprehensive analysis of e-commerce sales data, including:
- Data loading and exploration
- Data cleaning and preprocessing
- Exploratory Data Analysis (EDA)
- Data visualization
- Business insights and recommendations

---

## 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Set visualization style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10

# Import custom modules
import sys
import os
sys.path.append(os.path.join(os.path.dirname(os.getcwd()), 'src'))

from data_loader import load_sales_data, get_data_info
from data_cleaner import clean_sales_data
from analyzer import (
    calculate_kpis, print_kpis, analyze_by_category, analyze_by_region,
    analyze_by_time, analyze_top_products, analyze_customer_demographics,
    analyze_payment_methods
)
from visualizer import (
    plot_sales_trend, plot_category_analysis, plot_region_analysis,
    plot_top_products, plot_customer_demographics, plot_payment_methods,
    plot_heatmap_correlation
)

print("[OK] All libraries imported successfully")

## 2. Load Data

In [None]:
# Load the sales data
df = load_sales_data('../data/raw/sales_data.csv')
print(f"Data loaded successfully! Shape: {df.shape}")

## 3. Data Overview

In [None]:
# Display basic information
get_data_info(df)

In [None]:
# Display first few rows
df.head(10)

## 4. Data Cleaning

In [None]:
# Clean the data
df_clean = clean_sales_data(df)
print(f"\nCleaned data shape: {df_clean.shape}")

## 5. Key Performance Indicators (KPIs)

In [None]:
# Calculate and display KPIs
kpis = calculate_kpis(df_clean)
print_kpis(kpis)

## 6. Exploratory Data Analysis

### 6.1 Category Analysis

In [None]:
# Analyze by category
category_analysis = analyze_by_category(df_clean)
print("Category Performance:")
print(category_analysis)

In [None]:
# Visualize category performance
plot_category_analysis(df_clean, save=False)

### 6.2 Regional Analysis

In [None]:
# Analyze by region
region_analysis = analyze_by_region(df_clean)
print("Regional Performance:")
print(region_analysis)

In [None]:
# Visualize regional performance
plot_region_analysis(df_clean, save=False)

In [None]:
# Visualize regional performance
plot_region_analysis(df_clean, save=False)

### 6.3 Time-Based Analysis

In [None]:
# Analyze sales trends over time
time_analysis = analyze_by_time(df_clean)

print("Monthly Sales:")
print(time_analysis['monthly'])
print("\nQuarterly Sales:")
print(time_analysis['quarterly'])
print("\nYearly Sales:")
print(time_analysis['yearly'])

In [None]:
# Visualize sales trend
plot_sales_trend(df_clean, save=False)

### 6.4 Top Products Analysis

In [None]:
# Analyze top products
top_products = analyze_top_products(df_clean, top_n=15)
print("Top 15 Products by Revenue:")
print(top_products)

In [None]:
# Visualize top products
plot_top_products(df_clean, top_n=15, save=False)

### 6.5 Customer Demographics

In [None]:
# Analyze customer demographics
demo_analysis = analyze_customer_demographics(df_clean)
print("Customer Demographics:")
print(demo_analysis)

In [None]:
# Visualize customer demographics
plot_customer_demographics(df_clean, save=False)

### 6.6 Payment Method Analysis

In [None]:
# Analyze payment methods
payment_analysis = analyze_payment_methods(df_clean)
print("Payment Method Usage:")
print(payment_analysis)

In [None]:
# Visualize payment methods
plot_payment_methods(df_clean, save=False)

### 6.7 Correlation Analysis

In [None]:
# Visualize correlations
plot_heatmap_correlation(df_clean, save=False)

## 7. Key Insights Summary

In [None]:
# Generate key insights
print("=" * 80)
print("KEY INSIGHTS")
print("=" * 80)

print(f"\n1. Top Category: {category_analysis.index[0]}")
print(f"   Revenue: ${category_analysis.iloc[0]['Total_Revenue']:,.2f}")
print(f"   Share: {category_analysis.iloc[0]['Revenue_Share_%']}%")

print(f"\n2. Top Region: {region_analysis.index[0]}")
print(f"   Revenue: ${region_analysis.iloc[0]['Total_Revenue']:,.2f}")
print(f"   Share: {region_analysis.iloc[0]['Revenue_Share_%']}%")

print(f"\n3. Top Product: {top_products.index[0][0]}")
print(f"   Revenue: ${top_products.iloc[0]['Total_Revenue']:,.2f}")
print(f"   Orders: {int(top_products.iloc[0]['Order_Count'])}")

print(f"\n4. Average Order Value: ${kpis['Average_Order_Value']:,.2f}")
print(f"5. Total Customers: {kpis['Unique_Customers']:,}")
print(f"6. Discount Rate: {kpis['Discount_Rate']:.2f}%")