# Superstore Data Analysis
## Introduction
This notebook presents a complete analysis of the Superstore dataset, including data cleaning, EDA, and insights.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

sns.set(style='whitegrid')
%matplotlib inline

## 1. Data Loading and Inspection

In [None]:
# Note: Ensure the file 'Sample - Superstore.csv' is in the same directory
df = pd.read_csv('Sample - Superstore.csv', encoding='latin1')
df.head()

## 2. Data Cleaning

In [None]:
# Convert dates
df['Order Date'] = pd.to_datetime(df['Order Date'])
df['Ship Date'] = pd.to_datetime(df['Ship Date'])

# Check for duplicates
print(f'Duplicates: {df.duplicated().sum()}')
df = df.drop_duplicates()

# Check missing values
print(df.isnull().sum())

## 3. Exploratory Data Analysis

In [None]:
# Sales Distribution
plt.figure(figsize=(10, 6))
sns.histplot(df['Sales'], bins=100, log_scale=True, kde=True)
plt.title('Distribution of Sales (Log Scale)')
plt.show()

In [None]:
# Profit by Category
plt.figure(figsize=(10, 6))
sns.barplot(x='Category', y='Profit', data=df, estimator=np.sum, ci=None)
plt.title('Total Profit by Category')
plt.show()

In [None]:
# Correlation Matrix
plt.figure(figsize=(8, 6))
numeric_df = df.select_dtypes(include=[np.number])
sns.heatmap(numeric_df[['Sales', 'Quantity', 'Discount', 'Profit']].corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

## 4. Conclusion
See the attached report for full insights.