# Import Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go


# Load Dataset

In [None]:
df=pd.read_excel("C:\\Users\\Swapnil\\OneDrive\\Desktop\\EDA_PROJECT\\Super store2.xlsx")

In [None]:
df.head(2)

# Basic Overview

In [None]:
df.shape

In [None]:
df.dtypes

In [None]:
df.columns

In [None]:
df.info()

In [None]:
df.describe()

# Data Cleaning

In [None]:
df.isnull().sum()

In [None]:
df.dropna(inplace=True)


In [None]:
df.isnull().sum()

In [None]:
df.duplicated().sum()

# Extracting year from 'Order Date' and creating a new column 'Year',This helps us analyze data on a yearly basis.

In [None]:
df['Year'] = df['Order Date'].dt.year.astype(str)
df.Year

In [None]:
data=df.groupby("Year")["Sales"].sum().reset_index()
data1=df.groupby("Year")["Profit"].sum().reset_index()
data2=df.groupby("Year")["Order ID"].count().reset_index()



# Create subplots with 1 row and 3 columns

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
fig = make_subplots(rows=1, cols=3, 
                    subplot_titles=('Sales Trend', 'Profit Trend', 'Order Trend'))

fig.add_trace(go.Scatter(x=data['Year'], y=data['Sales'], mode='lines+markers', name='Sales'), row=1, col=1)

fig.add_trace(go.Scatter(x=data1['Year'], y=data1['Profit'], mode='lines+markers', name='Profit'), row=1, col=2)

fig.add_trace(go.Scatter(x=data2['Year'], y=data2['Order ID'], mode='lines+markers', name='Order ID'), row=1, col=3)

fig.update_layout(
    width=1200, 
    height=400,
    title_text='📊 Yearly Trends'
)
fig.show()


# Ship Mode Distribution — Orders per shipping method


In [None]:
ship_mode_data = df['Ship Mode'].value_counts().reset_index()
ship_mode_data.columns = ['Ship Mode', 'Count'] # convert into column names 
ship_mode_data

In [None]:
fig = px.bar(
    ship_mode_data,
    x='Ship Mode',
    y='Count',
    color='Ship Mode',
    title='🚚 Distribution of Ship Modes',
    text='Count',
    color_discrete_sequence=px.colors.qualitative.Vivid
)
fig.update_layout(
    width=600,
    height=400,
    template='plotly_white'
)

#  Segment Distribution — Share of each customer segment

In [None]:
fig = px.pie(
    df,
    names='Segment',
    title='👥 Customer Segment Distribution',
    hole=0.4,
    color_discrete_sequence=px.colors.sequential.RdBu
)

fig.update_layout(
    width=600,
    height=400,
    template='plotly_white'
)

# Treemap — Sales distribution by Category and Sub-Category



In [None]:
fig = px.treemap(
    df,
    path=['Category', 'Sub-Category'],
    values='Sales',
    title='🧩 Sales by Category and Sub-Category',
    color='Category',
    color_discrete_sequence=px.colors.qualitative.Set3
)
fig.update_layout(
    width=700,
    height=500,
    template='plotly_white'
)


# Sales Histogram — Distribution of Sales values  
 

In [None]:
fig_sales = px.histogram(
    df,
    x='Sales',
    nbins=50,
    title='💰 Sales Distribution',
    color_discrete_sequence=['#636EFA']
)
fig_sales.update_layout(
    width=600,
    height=400,
    template='plotly_white'
)

# Profit Histogram — Distribution of Profit values 

In [None]:
fig_profit = px.histogram(
    df,
    x='Profit',
    nbins=50,
    title='💹 Profit Distribution',
    color_discrete_sequence=['#EF553B']
)
fig_profit.update_layout(
    width=600,
    height=400,
    template='plotly_white'
)

 # Profit by Region & Segment

In [None]:
fig = px.sunburst(
    df,
    path=['Region', 'Segment'],
    values='Profit',
    color='Profit',
    color_continuous_scale='RdBu',
    title='🌞 Profit by Region and Segment (Sunburst)'
)

fig.update_layout(width=700, height=700)
fig.show()


# Exploring the direct relationship between Discount and Profitusing an interactive scatter plot.

In [None]:
fig = px.scatter(
    df,
    x='Discount',
    y='Profit',
    title='🔎 Scatter Plot — Discount vs Profit',
    color_discrete_sequence=['indigo'],
    opacity=0.7
)

fig.update_traces(marker=dict(size=8))
fig.update_layout(width=700, height=500, 
template='plotly_white',)
fig.show()

# Exploring the link between Sales & Profit

In [None]:
fig = px.scatter(
    df,
    x='Sales',
    y='Profit',
    title='📈 Scatter Plot — Sales vs Profit',
    color_discrete_sequence=['darkcyan'],
    opacity=0.6
)

fig.update_traces(marker=dict(size=7))
fig.update_layout(width=700, height=500 ,
template='plotly_white')
fig.show()

# Visualizing State-wise Sales on Map

In [None]:
import us

df['State Code'] = df['State'].apply(lambda x: us.states.lookup(x).abbr if us.states.lookup(x) else x)


In [None]:
fig = px.choropleth(df, 
                    locations='State Code',
                    locationmode='USA-states',
                    color='Sales',
                    scope="usa",
                    hover_name='State',
                     hover_data=['Sales', 'Profit'],
                    title='🗺️ State-wise Sales and Profit Distribution (Superstore)',)

fig.update_layout(
    width=800,
    height=500)


In [None]:
fig = px.scatter(
    df,
    x="Sales",
    y="Profit",
    size="Discount",
    color="Category",
    hover_name="Sub-Category",
    title="💬 Sales vs Profit Bubble Chart (Size by Discount)",
    size_max=40  
)
fig.update_layout(width=800, height=500)
fig.show()

# Heatmap showing how sales are distributed across categories and regions.

In [None]:
# Group and Unstack the Data
heatmap_data = df.groupby(['Region', 'Category'])['Sales'].sum().unstack()
heatmap_data

In [None]:
fig = px.imshow(
    heatmap_data,
    text_auto=True,
    color_continuous_scale='YlGnBu',
    title='Total Sales by Region and Category',
    labels=dict(color="Sales"),
    aspect="auto"
)

fig.update_layout(
    font=dict(size=14),
    width=700,
    height=500
)

fig.show()

# This heatmap shows the correlation between key numerical variables in the Superstore dataset: Sales, Profit, Quantity, and Discount. 

In [None]:
corr_matrix = df[['Sales', 'Profit', 'Quantity', 'Discount']].corr()
fig = px.imshow(corr_matrix,
text_auto=True,
title='Correlation Heatmap - Superstore',
aspect="auto",
labels=dict(color="Correlation"))
fig.update_layout(
    width=600,
    height=500,
    font=dict(size=14)
)

fig.show()
