# 01 - Exploratory Data Analysis

## Objective
Understand the distribution, trends, and relationships in the dataset.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load cleaned dataset
df = pd.read_csv('../data/cleaned/cleaned_orders.csv')
df['order_date'] = pd.to_datetime(df['order_date'])
df.head()

## 1. Dataset Overview

In [None]:
df.info()

In [None]:
df.describe()

## 2. Missing Values and Duplicates

In [None]:
df.isnull().sum()

In [None]:
df.duplicated().sum()

## 3. Sales Distribution

In [None]:
plt.figure(figsize=(10,6))
sns.histplot(df['sales'], bins=50, kde=True)
plt.title('Sales Distribution')
plt.xlabel('Sales')
plt.ylabel('Frequency')
plt.show()

## 4. Monthly Sales Trend

In [None]:
monthly = df.groupby('order_month')['sales'].sum()
monthly.plot(figsize=(10,5), marker='o', title='Monthly Sales')
plt.ylabel('Total Sales')
plt.show()

## 5. Top 10 Products by Sales

In [None]:
df.groupby('product_name')['sales'].sum().sort_values(ascending=False).head(10)