# 🛒 Superstore Dataset - Exploratory Data Analysis (EDA)
**Objective:** Extract meaningful insights through visual and statistical exploration.
**Tools:** Python, Pandas, Matplotlib, Seaborn

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid')
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Load the dataset
df = pd.read_csv('superstore.csv.csv')
df.head()

## 🔍 Initial Exploration

In [None]:
df.info()
df.describe()
df.isnull().sum()
df.duplicated().sum()

## 🧹 Data Cleaning

In [None]:
df['Order Date'] = pd.to_datetime(df['Order Date'])
df['Ship Date'] = pd.to_datetime(df['Ship Date'])
df.drop(columns=['Row ID'], inplace=True)
df.head()

## 📊 Univariate Analysis (Histograms & Boxplots)

In [None]:
numerics = ['Sales', 'Profit', 'Quantity', 'Discount']
for col in numerics:
    plt.figure(figsize=(12,4))
    plt.subplot(1,2,1)
    sns.histplot(df[col], kde=True)
    plt.title(f'Histogram of {col}')

    plt.subplot(1,2,2)
    sns.boxplot(x=df[col])
    plt.title(f'Boxplot of {col}')
    plt.show()

## 📦 Categorical Analysis

In [None]:
categorical = ['Segment', 'Region', 'Category', 'Sub-Category', 'Ship Mode']
for col in categorical:
    print(df[col].value_counts())
    sns.countplot(y=df[col])
    plt.title(f'Count of {col}')
    plt.show()

## 🔗 Bivariate Analysis

In [None]:
sns.pairplot(df[numerics])
plt.show()
sns.heatmap(df[numerics].corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

In [None]:
sns.scatterplot(x='Discount', y='Profit', data=df)
plt.title('Discount vs Profit')
plt.show()

In [None]:
df.groupby('Category')[['Sales', 'Profit']].mean().plot(kind='bar')
plt.title('Avg Sales and Profit per Category')
plt.show()

## 🕒 Time Series Analysis

In [None]:
df['Month'] = df['Order Date'].dt.to_period('M')
monthly_sales = df.groupby('Month')['Sales'].sum()
monthly_sales.plot(kind='line', figsize=(12,5))
plt.title('Monthly Sales Trend')
plt.xlabel('Month')
plt.ylabel('Sales')
plt.show()

## 🧾 Summary of Insights
- Most profitable categories and regions identified
- High discounts negatively affect profit
- Sales trends show seasonal patterns
- Outliers are present in Sales and Profit distributions
- Category-wise breakdown helps target strategies