In [11]:
# Importing pandas library for use of inputing our csv dataset
import pandas as pd

In [12]:
# Importing the sample superstore csv dataset using the pandas library
superstore = pd.read_csv("Sample - Superstore.csv", encoding = "latin1")

# Exploring the imported sample superstore dataset
superstore.head()

Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,...,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
0,1,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136
1,2,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,42420,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582
2,3,CA-2016-138688,6/12/2016,6/16/2016,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,...,90036,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714
3,4,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031
4,5,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,33311,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164


## Introduction to Plotly Express

In [10]:
# Install the plotly library
pip install plotly

SyntaxError: invalid syntax (3721129173.py, line 2)

In [13]:
# Import the plotly express library for use in our project
import plotly.express as px

# Import the plotly graph objects library for use in our project
import plotly.graph_objects as go

### 1. Product Analysis Dashboard

In [14]:
superstore.Category.unique()

array(['Furniture', 'Office Supplies', 'Technology'], dtype=object)

In [15]:
superstore.Category.value_counts()

Category
Office Supplies    6026
Furniture          2121
Technology         1847
Name: count, dtype: int64

In [16]:
superstore.shape

(9994, 21)

In [17]:
categoryquantity = superstore.groupby('Category')['Quantity'].sum()
categoryquantity

Category
Furniture           8028
Office Supplies    22906
Technology          6939
Name: Quantity, dtype: int64

In [18]:
categoryquantity.keys()

Index(['Furniture', 'Office Supplies', 'Technology'], dtype='object', name='Category')

In [19]:
categoryquantity.index

Index(['Furniture', 'Office Supplies', 'Technology'], dtype='object', name='Category')

In [20]:
categoryquantity.values

array([ 8028, 22906,  6939])

#### Pie Chart
- **Documentation (px.pie() function parameters)** - https://plotly.com/python-api-reference/generated/plotly.express.pie

- **Documentation for using and styling pie charts** - https://plotly.com/python/pie-charts/

- **Documentation (.add_annotation() function parameters)** - https://plotly.com/python-api-reference/generated/plotly.graph_objects.html#plotly.graph_objects.Figure

- **Documentation (.update_layout() function parameters)** - https://plotly.com/python/reference/layout/

In [22]:
# Creating a plotly express doughnut chart 
categoryquantitydistribution = px.pie(names = categoryquantity.index, values = categoryquantity.values, hole = 0.7, title = "Categories Sold", color_discrete_sequence=px.colors.qualitative.Dark24_r)

# Changing the graph, title and legend colors to white
categoryquantitydistribution.update_layout(paper_bgcolor = "rgba(0, 0, 0, 0)", title = dict(font = dict(color = "white", size = 50, family = "Old Standard TT")), legend_font_color = "white")

# Finding total quantity sold based on category
totalquantitysold = '{:,}'.format(categoryquantity.sum()) # Adding the comma where necessary for cases of thousands, hundreds of thousands, millions, billions, trillions etc.

# Adding the text in the middle of the doughnut chart 
categoryquantitydistribution.add_annotation(text = "Total Sold", showarrow = False, font_color = "white", y = 0.55, font_size = 14) # Adding the title
categoryquantitydistribution.add_annotation(text = totalquantitysold, showarrow = False, font_color = "white", y = 0.45, font_size = 14) # Adding the total quantity sold calculated

# Outputing the doughnut chart created
categoryquantitydistribution

In [None]:
superstore.columns

Index(['Row ID', 'Order ID', 'Order Date', 'Ship Date', 'Ship Mode',
       'Customer ID', 'Customer Name', 'Segment', 'Country', 'City', 'State',
       'Postal Code', 'Region', 'Product ID', 'Category', 'Sub-Category',
       'Product Name', 'Sales', 'Quantity', 'Discount', 'Profit'],
      dtype='object')

#### Sunburst
- **Documentation (px.sunburst() function parameters)** - https://plotly.com/python-api-reference/generated/plotly.express.sunburst.html

- **Documentation for using and styling pie charts** - https://plotly.com/python/sunburst-charts/

- **Documentation (.update_traces() function parameters)** - https://plotly.com/python/reference/sunburst/

- **Documentation (.update_layout() function parameters)** - https://plotly.com/python/reference/layout/

In [25]:
# Creating a new dataframe that has a grouped Category and Sub-Category
category_subcategory_quantity = superstore.groupby(['Category', 'Sub-Category'])['Quantity'].sum().reset_index()

# Create a sunburst showing the distribution of category and sub-category on a parent-child relationship
category_subcategory_quantity_distribution = px.sunburst(category_subcategory_quantity, path = ['Category', 'Sub-Category'], values = "Quantity", title="Category / Sub-Category Distribution")

# Changing the graph, title and legend colors to white
category_subcategory_quantity_distribution.update_layout(paper_bgcolor = "rgba(0, 0, 0, 0)", title = dict(font = dict(color = "white", size = 14)), legend_font_color = "white")

# Give the different sub-categories different colors
category_subcategory_quantity_distribution.update_traces(marker_colors = px.colors.qualitative.Dark24_r)

category_subcategory_quantity_distribution

In [None]:
# Find the total quantity sold in the entire dataset
quantitysold = superstore.Quantity.sum()

# Finding the top 3 cities based on quantity sold
top3cities = superstore.groupby('City')['Quantity'].sum().nlargest(3)

# Find the total quantity sold based on the top 3 cities
top3citiesgrouped = top3cities.sum()

# Finding percentage distribution of total quantity sold in top 3 cities to total quantity sold in the entire dataset
percentagedistribution = (top3cities / quantitysold) * 100

distributionlabels = [f"{city} : {percentage : .2f}%" for city, percentage in zip(top3cities.index, percentagedistribution)]

# Creating the doughnut chart showing the distriubtion based on the top 3 cities
cityquantitydistribution = px.pie(names = distributionlabels, values = top3cities.values, hole = 0.7, color_discrete_sequence=px.colors.qualitative.Dark24_r, title = "Top 3 Cities Sold To")

# Changing the graph, title and legend colors to white
cityquantitydistribution.update_layout(paper_bgcolor = "rgba(0, 0, 0, 0)", title = dict(font = dict(color = "white", size = 14)), legend_font_color = "white")

# Adding the text in the middle of the doughnut chart
cityquantitydistribution.add_annotation(text = "Top 3 Cities", showarrow = False, font = dict(size = 14, color = "white"), y = 0.6)
cityquantitydistribution.add_annotation(text = "Total Sold", showarrow = False, font = dict(size = 14, color = "white"))
cityquantitydistribution.add_annotation(text = '{:,}'.format(top3citiesgrouped), showarrow = False, font = dict(size = 14, color = "white"), y = 0.4)

cityquantitydistribution

#### Maps
- **Documentation (px.scatter_geo() function parameters)** - https://plotly.com/python-api-reference/generated/plotly.express.sunburst.html

- **Documentation for using and styling scatter geo charts** - https://plotly.com/python/sunburst-charts/

- **Documentation (.update_layout() function parameters)** - https://plotly.com/python/reference/layout/

- **Documentation (.update_geos() function parameters)** - https://plotly.com/python/reference/layout/geo/

- **Documentation (.update_coloraxes() function parameters)** - https://plotly.com/python/reference/layout/coloraxis/

In [None]:
countryquantity = superstore.groupby('Country')['Quantity'].sum().reset_index()

countryquantitydistribution = px.scatter_geo(countryquantity, locations = countryquantity['Country'], locationmode = "country names", projection = "natural earth",
                                            size = countryquantity.groupby("Country")['Quantity'].sum().values, color = countryquantity.groupby("Country")['Quantity'].sum().values, color_continuous_scale = "magma_r")

countryquantitydistribution.update_layout(paper_bgcolor = "rgba(0, 0, 0, 0)", title = dict(font = dict(color = "white", size = 14)), geo = dict(bgcolor = "rgba(0, 0, 0, 0)", landcolor = "slategrey", showcountries = True))

countryquantitydistribution.update_geos(showframe = True)

countryquantitydistribution.update_coloraxes(colorbar_tickfont_color = "white", colorbar_title = "Quantity Sold", colorbar_title_font_color = "White")

countryquantitydistribution