In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

In [3]:
df = pd.read_csv('resources/sales_data_final.csv')
df.head()

Unnamed: 0,Transaction ID,Date,Month,Product Category,Product Name,Units Sold,Unit Price,Total Revenue,Region,Payment Method
0,10001,2024-01-01,1,Electronics,iPhone 14 Pro,2,999.99,1999.98,North America,Credit Card
1,10002,2024-01-02,1,Home Appliances,Dyson V11 Vacuum,1,499.99,499.99,Europe,PayPal
2,10003,2024-01-03,1,Clothing,Levi's 501 Jeans,3,69.99,209.97,Asia,Debit Card
3,10004,2024-01-04,1,Books,The Da Vinci Code,4,15.99,63.96,North America,Credit Card
4,10005,2024-01-05,1,Beauty Products,Neutrogena Skincare Set,1,89.99,89.99,Europe,PayPal


## Total Database Analysis
- Average unit price by product category
- Total units sold by product category
- Total revenue by item category
- Revenue distribution by item category
- Distribution of payment methods used
- Total revenue by payment method

In [4]:
# calculate average unit price for each product category
avg_unit_price_category = df.groupby(by=['Product Category'])['Unit Price'].mean().sort_values(ascending=False)

avg_unit_price_category

Product Category
Electronics        691.5915
Home Appliances    320.1855
Sports             261.2840
Clothing            67.5365
Beauty Products     61.6230
Books               16.1530
Name: Unit Price, dtype: float64

In [5]:
# plot average unit price per category for entire dataset
fig = px.bar(avg_unit_price_category, x=avg_unit_price_category.index, y=avg_unit_price_category.values,
             labels={
                 'x': 'Category', 
                 'y': 'Mean Unit Price'
                 },
             title='Mean Unit Price by Item Category',
             color=avg_unit_price_category.index,
             text_auto=True
             )
fig.update_traces(textfont_size=11, textangle=0, textposition="outside", cliponaxis=False, texttemplate='%{y:.2f}')
fig.show()

In [6]:
# total units sold per category
total_units_sold_category = df.groupby(by=['Product Category'])['Units Sold'].sum().sort_values(ascending=False)

total_units_sold_category

Product Category
Clothing           145
Books              114
Sports              88
Electronics         66
Home Appliances     59
Beauty Products     46
Name: Units Sold, dtype: int64

In [7]:
# plot total units sold per category
fig = px.bar(total_units_sold_category, x=total_units_sold_category.index, y=total_units_sold_category.values,
             labels={
                 'x': 'Category', 
                 'y': 'Units Sold'
                 },
             title='Total Units Sold by Item Category',
             color=total_units_sold_category.index,
             text_auto=True
             )
fig.update_traces(textfont_size=11, textangle=0, textposition="outside", cliponaxis=False)
fig.show()

In [8]:
# total revenue per category
total_revenue_category = df.groupby(by=['Product Category'])['Total Revenue'].sum().sort_values(ascending=False)

total_revenue_category

Product Category
Electronics        34982.41
Home Appliances    18646.16
Sports             14326.52
Clothing            8128.93
Beauty Products     2621.90
Books               1861.93
Name: Total Revenue, dtype: float64

In [9]:
# plot total revenue per category
fig = px.bar(total_revenue_category, x=total_revenue_category.index, y=total_revenue_category.values,
             labels={
                 'x': 'Category', 
                 'y': 'Total Revenue'
                 },
             title='Total Revenue by Item Category',
             color=total_revenue_category.index,
             text_auto=True
             )
fig.update_traces(textfont_size=11, textangle=0, textposition="outside", cliponaxis=False, texttemplate='%{y:.2f}')
fig.show()

In [10]:
# plot pie chart total revenue by category
fig = px.pie(total_revenue_category, values=total_revenue_category.values, 
             names=total_revenue_category.index, title='Revenue Distribution by Item Category')
fig.show()

In [26]:
# payment methods
payment_methods = df.groupby(by=['Payment Method'])['Product Category'].count().sort_values(ascending=False)

payment_methods

Payment Method
Credit Card    120
PayPal          80
Debit Card      40
Name: Product Category, dtype: int64

In [27]:
# pie chart for payment methods
fig = px.pie(payment_methods, values=payment_methods.values, 
             names=payment_methods.index, title='Distribution of Payment Types Used')
fig.show()

In [29]:
# total revenue per payment method
payment_methods_revenue = df.groupby(by=['Payment Method'])['Total Revenue'].sum().sort_values(ascending=False)

payment_methods_revenue

Payment Method
Credit Card    51170.86
PayPal         21268.06
Debit Card      8128.93
Name: Total Revenue, dtype: float64

In [30]:
fig = px.bar(payment_methods_revenue, x=payment_methods_revenue.index, y=payment_methods_revenue.values,
             labels={
                 'x': 'Payment Method', 
                 'y': 'Total Revenue'
                 },
             title='Total Revenue by Payment Method',
             color=payment_methods_revenue.index,
             text_auto=True
             )
fig.update_traces(textfont_size=11, textangle=0, textposition="outside", cliponaxis=False, texttemplate='%{y:.2f}')
fig.show()

## Analysis by Month
- 

In [65]:
monthly_group = df.groupby(['Month'])

monthly_cat_group = df.groupby(['Month', 'Product Category'])

monthly_cat_group.count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Transaction ID,Date,Product Name,Units Sold,Unit Price,Total Revenue,Region,Payment Method
Month,Product Category,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,Beauty Products,5,5,5,5,5,5,5,5
1,Books,5,5,5,5,5,5,5,5
1,Clothing,5,5,5,5,5,5,5,5
1,Electronics,6,6,6,6,6,6,6,6
1,Home Appliances,5,5,5,5,5,5,5,5
1,Sports,5,5,5,5,5,5,5,5
2,Beauty Products,5,5,5,5,5,5,5,5
2,Books,5,5,5,5,5,5,5,5
2,Clothing,5,5,5,5,5,5,5,5
2,Electronics,4,4,4,4,4,4,4,4
