## E-Commerce Sales Analysis

1. Load Libraries
We import necessary libraries: pandas, numpy, matplotlib, seaborn.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style='whitegrid')
%matplotlib inline

## 2. Load Data
Load the CSV file using `load_data` function from `src/data_loader.py`

In [None]:
file_path = '../data/sales_data.csv'
df = pd.read_csv("../Dataset/store-dataset.csv", encoding='latin1')
df.head()

## 3. Clean Data
Clean the dataset using `clean_data` function:
- Remove duplicates
- Fill missing values
- Convert dates to datetime format

In [None]:
df = clean_data(df)
df.info()

## 4. Data Exploration
Check first few rows, data types, and missing values

In [None]:
print('Number of rows:', df.shape[0])
print('Number of columns:', df.shape[1])
print('Missing values per column:
', df.isnull().sum())

## 5. Sales Summary
Calculate total and average sales, top products, top categories and sub-categories

In [None]:
total = total_sales(df)
average = average_sales(df)
print(f'Total Sales: ${total:,.2f}')
print(f'Average Sales per Order: ${average:,.2f}')

top_products = top_selling_products(df, top_n=10)
print('Top 10 Products:
', top_products)

categories, sub_categories = top_categories(df)
print('Top Categories:
', categories)
print('Top Sub-Categories:
', sub_categories)

In [None]:
plt.figure(figsize=(10,6))
top_products.plot(kind='bar', color='skyblue')
plt.title('Top 10 Selling Products')
plt.ylabel('Sales')
plt.xticks(rotation=45)
plt.show()

## 6. Trend Analysis
Monthly sales trends and state-wise sales analysis

In [None]:
monthly = monthly_sales(df)
plt.figure(figsize=(10,6))
monthly.plot(kind='line', marker='o', color='green')
plt.title('Monthly Sales Trends')
plt.ylabel('Sales')
plt.xticks(rotation=45)
plt.show()

In [None]:
state_sales = df.groupby('State')['Sales'].sum().sort_values(ascending=False)
plt.figure(figsize=(12,6))
state_sales.plot(kind='bar', color='orange')
plt.title('Sales by State')
plt.ylabel('Sales')
plt.xticks(rotation=45)
plt.show()

## 7. Insights
Summarize key insights at the end of the notebook

In [None]:
print('INSIGHTS:')
print('- Best states to sell in: ', state_sales.index[:3].tolist())
print('- Most profitable sub-categories: ', sub_categories.index[:3].tolist())
print('- Top-selling products: ', top_products.index[:3].tolist())
print('- Best months to sell: ', monthly.sort_values(ascending=False).index[:2].tolist())