In [42]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import plotly.io as pio
import plotly.colors as colors
pio.templates.default = "plotly_white"

data = pd.read_csv("Sample - Superstore.csv", encoding='latin-1')
print(data.head())

print(data.describe())

"""The dataset has an order date column. We can use this column to create new columns like order month, order year, 
   and order day, which will be very valuable for sales and profit analysis according to time periods. So let’s 
   add these columns:"""

data['Order Date'] = pd.to_datetime(data['Order Date'])
data['Ship Date'] = pd.to_datetime(data['Ship Date']) 

data['Order Month'] = data['Order Date'].dt.month 
data['Order Year'] = data['Order Date'].dt.year
data['Order Day of Week'] = data['Order Date'].dt.dayofweek


sales_by_month = data.groupby('Order Month')['Sales'].sum().reset_index() #let’s have a look at the monthly sales
fig = px.line(sales_by_month, x='Order Month', 
                              y='Sales', 
                              title='Monthly Sales Analysis')
fig.show()


sales_by_category = data.groupby('Category')['Sales'].sum().reset_index() #let’s have a look at the sales by category
fig = px.pie(sales_by_category, 
             values='Sales', 
             names='Category', 
             hole=0.5, 
             color_discrete_sequence=px.colors.qualitative.Pastel)

fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(title_text='Sales Analysis by Category', title_font=dict(size=24))
fig.show()


sales_by_subcategory = data.groupby('Sub-Category')['Sales'].sum().reset_index() #let’s have a look at the sales by sub-category
fig = px.bar(sales_by_subcategory,       
             x='Sub-Category', 
             y='Sales', 
             title='Sales Analysis by Sub-Category')
fig.show()


profit_by_month = data.groupby('Order Month')['Profit'].sum().reset_index() #let's have a look at monthly profit
fig = px.line(profit_by_month, 
              x='Order Month', 
              y='Profit', 
              title='Monthly Profit Analysis')
fig.show()


profit_by_category = data.groupby('Category')['Profit'].sum().reset_index() #let’s have a look at the profit by category
fig = px.pie(profit_by_category, 
             values='Profit', 
             names='Category', 
             hole=0.5, 
             color_discrete_sequence=px.colors.qualitative.Pastel)

fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(title_text='Profit Analysis by Category', title_font=dict(size=24))
fig.show()


profit_by_subcategory = data.groupby('Sub-Category')['Profit'].sum().reset_index() #et’s have a look at the profit by sub-category
fig = px.bar(profit_by_subcategory, x='Sub-Category', 
             y='Profit', 
             title='Profit Analysis by Sub-Category')
fig.show()


sales_profit_by_segment = data.groupby('Segment').agg({'Sales': 'sum', 'Profit': 'sum'}).reset_index()

color_palette = colors.qualitative.Pastel
                                                  #let’s have a look at the sales and profit analysis by customer segments
fig = go.Figure()
fig.add_trace(go.Bar(x=sales_profit_by_segment['Segment'], 
                     y=sales_profit_by_segment['Sales'], 
                     name='Sales',
                     marker_color=color_palette[0]))
fig.add_trace(go.Bar(x=sales_profit_by_segment['Segment'], 
                     y=sales_profit_by_segment['Profit'], 
                     name='Profit',
                     marker_color=color_palette[1]))

fig.update_layout(title='Sales and Profit Analysis by Customer Segment',
                  xaxis_title='Customer Segment', yaxis_title='Amount')

fig.show()


   Row ID        Order ID  Order Date   Ship Date       Ship Mode Customer ID   
0       1  CA-2016-152156   11/8/2016  11/11/2016    Second Class    CG-12520  \
1       2  CA-2016-152156   11/8/2016  11/11/2016    Second Class    CG-12520   
2       3  CA-2016-138688   6/12/2016   6/16/2016    Second Class    DV-13045   
3       4  US-2015-108966  10/11/2015  10/18/2015  Standard Class    SO-20335   
4       5  US-2015-108966  10/11/2015  10/18/2015  Standard Class    SO-20335   

     Customer Name    Segment        Country             City  ...   
0      Claire Gute   Consumer  United States        Henderson  ...  \
1      Claire Gute   Consumer  United States        Henderson  ...   
2  Darrin Van Huff  Corporate  United States      Los Angeles  ...   
3   Sean O'Donnell   Consumer  United States  Fort Lauderdale  ...   
4   Sean O'Donnell   Consumer  United States  Fort Lauderdale  ...   

  Postal Code  Region       Product ID         Category Sub-Category   
0       42420   Sout