# Exploratory Data Analysis

This notebook is used for performing exploratory data analysis (EDA) on the e-commerce dataset. It includes data loading, preprocessing, and visualization steps.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set visualization style
sns.set(style='whitegrid')

# Load the dataset
data = pd.read_csv('../data/raw/ecommerce_data.csv')  # Update with the actual path to your raw data

# Display the first few rows of the dataset
data.head()

In [None]:
# Check for missing values
missing_values = data.isnull().sum()
missing_values[missing_values > 0]

In [None]:
# Visualize the distribution of a key variable
plt.figure(figsize=(10, 6))
sns.histplot(data['sales'], bins=30, kde=True)
plt.title('Distribution of Sales')
plt.xlabel('Sales')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Correlation matrix
plt.figure(figsize=(12, 8))
correlation_matrix = data.corr()
sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()