# Comprehensive EDA for E-Commerce Dataset
This notebook performs full exploratory data analysis as requested.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (10,5)


## Load Dataset

In [None]:
df = pd.read_csv('/mnt/data/compressed_file.csv.gz', compression='gzip')
df.head()

## Basic Info

In [None]:
df.info()

In [None]:
df.describe(include='all')

## Data Cleaning

In [None]:
df['Amount']=df['Amount'].fillna(df['Amount'].median())

In [None]:
cat_cols=df.select_dtypes(include='object').columns
for col in cat_cols:
    df[col]=df[col].fillna('Unknown')

## Date Processing

In [None]:
df['Date']=pd.to_datetime(df['Date'],errors='coerce')
df['Year']=df['Date'].dt.year
df['Month']=df['Date'].dt.month
df['Day']=df['Date'].dt.day

## Financial Analysis

In [None]:
revenue=df.groupby('Month')['Amount'].sum()
plt.plot(revenue); plt.title('Revenue by Month'); plt.show()

In [None]:
top_cat=df.groupby('Category')['Amount'].sum().sort_values(ascending=False).head(10)
sns.barplot(x=top_cat.values,y=top_cat.index); plt.title('Top Categories'); plt.show()

In [None]:
AOV=df.groupby('Order ID')['Amount'].sum().mean()
AOV

## Customer Insights

In [None]:
state_sales=df.groupby('ship-state')['Amount'].sum().sort_values(ascending=False).head(10)
sns.barplot(x=state_sales.values,y=state_sales.index); plt.title('Top States'); plt.show()

In [None]:
sns.countplot(y=df['Status']); plt.title('Order Status'); plt.show()

In [None]:
b2b=df.groupby('B2B')['Amount'].sum()
sns.barplot(x=b2b.index.astype(str),y=b2b.values); plt.title('B2B vs B2C'); plt.show()

## Logistics Analysis

In [None]:
sns.countplot(y=df['ship-service-level']); plt.title('Shipping Levels'); plt.show()

In [None]:
ful=df.groupby('Fulfillment')['Amount'].sum().sort_values(ascending=False)
sns.barplot(x=ful.values,y=ful.index); plt.title('Fulfillment Revenue'); plt.show()

## Product & Inventory

In [None]:
qty=df.groupby('Category')['Quantity'].sum().sort_values(ascending=False).head(10)
sns.barplot(x=qty.values,y=qty.index); plt.title('Demand by Category'); plt.show()

## Returns & Cancellations

In [None]:
cancel_rate=df[df['Status']=='Cancelled'].shape[0]/df.shape[0]*100
cancel_rate

In [None]:
returns=df[df['Status'].str.contains('Return',case=False,na=False)]
sns.countplot(y=returns['Category']); plt.title('Returns by Category'); plt.show()

## Recommendations & Conclusion
- Increase focus on high-revenue categories.
- Improve quality in high-return categories.
- Prioritize Amazon fulfillment to reduce delays.
- Target marketing in top-performing states.
- Optimize low-performing inventory categories.