# Data analysis on Pizza Sales

In [None]:
# I imported my necessary libraries and also the dataset
import pandas as pd
import plotly.express as pe
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.graph_objects as pg
pd.set_option('display.max_columns', None)
data = pd.read_csv('/content/pizza_sales.csv')
data.head(4)

Unnamed: 0,pizza_id,order_id,pizza_name_id,quantity,order_date,order_time,unit_price,total_price,pizza_size,pizza_category,pizza_ingredients,pizza_name
0,1.0,1.0,hawaiian_m,1.0,1/1/2015,11:38:36,13.25,13.25,M,Classic,"Sliced Ham, Pineapple, Mozzarella Cheese",The Hawaiian Pizza
1,2.0,2.0,classic_dlx_m,1.0,1/1/2015,11:57:40,16.0,16.0,M,Classic,"Pepperoni, Mushrooms, Red Onions, Red Peppers,...",The Classic Deluxe Pizza
2,3.0,2.0,five_cheese_l,1.0,1/1/2015,11:57:40,18.5,18.5,L,Veggie,"Mozzarella Cheese, Provolone Cheese, Smoked Go...",The Five Cheese Pizza
3,4.0,2.0,ital_supr_l,1.0,1/1/2015,11:57:40,20.75,20.75,L,Supreme,"Calabrese Salami, Capocollo, Tomatoes, Red Oni...",The Italian Supreme Pizza


In [None]:
## I seperated the categorical data from the numerical data
categorical = data.select_dtypes(include = ['object', 'category'])
numerical = data.select_dtypes(include = 'number')
print(f'Categorical data')
display(categorical.head(4))
print(f'\n\nNumerical data')
display(numerical.head(2))

Categorical data


Unnamed: 0,pizza_name_id,order_date,order_time,pizza_size,pizza_category,pizza_ingredients,pizza_name
0,hawaiian_m,1/1/2015,11:38:36,M,Classic,"Sliced Ham, Pineapple, Mozzarella Cheese",The Hawaiian Pizza
1,classic_dlx_m,1/1/2015,11:57:40,M,Classic,"Pepperoni, Mushrooms, Red Onions, Red Peppers,...",The Classic Deluxe Pizza
2,five_cheese_l,1/1/2015,11:57:40,L,Veggie,"Mozzarella Cheese, Provolone Cheese, Smoked Go...",The Five Cheese Pizza
3,ital_supr_l,1/1/2015,11:57:40,L,Supreme,"Calabrese Salami, Capocollo, Tomatoes, Red Oni...",The Italian Supreme Pizza




Numerical data


Unnamed: 0,pizza_id,order_id,quantity,unit_price,total_price
0,1.0,1.0,1.0,13.25,13.25
1,2.0,2.0,1.0,16.0,16.0


# Data Exploration and Visualization
##### I queried the dataset and I did some visualization with it

In [None]:
# Using a Pie Chart, Show the top 3 best selling Pizza Category (Metric is Total Price)
sel = data[['pizza_category', 'total_price']]
sel = sel.groupby('pizza_category')[['total_price']].mean().sort_values(ascending = False, by = 'total_price')
print('Total price of the pizza_category')
print(sel)

fig = pe.pie(data_frame= sel, names= sel.index, values = 'total_price', color= sel.index , hole = 0.2)
fig.show()

Total price of the pizza_category
                total_price
pizza_category             
Chicken           18.115534
Supreme           17.678271
Veggie            16.917674
Classic           15.093840


In [None]:
# Of the 3 best selling Pizza size, Show the distribution of Pizza Sizes
sel = data[['pizza_size', 'total_price']]
sel = sel.groupby('pizza_size')[['total_price']].mean().sort_values(ascending = False, by = 'total_price')
print('Top 3 best selling pizza size')
print(sel)

fig = pe.bar(data_frame= sel, x = sel.index, y = 'total_price', color = sel['total_price'] , width = 800, height = 500, title = 'Top 3 best selling pizza size')
fig.show()

Top 3 best selling pizza size
            total_price
pizza_size             
XXL           35.950000
XL            25.875000
L             20.259025
M             16.209441
S             12.596484


In [None]:
# Show the pizza size that sells the fastest
sel = data[['pizza_size', 'quantity']]
sel = sel.groupby('pizza_size')[['quantity']].sum().sort_values(ascending = False, by = 'quantity')
print('Fast selling pizza size')
print(sel)

fig = pe.bar(data_frame= sel, x = sel.index, y = 'quantity', color = sel['quantity'] , width = 800, height = 450, title = 'Fast selling pizza size')
fig.show()

Fast selling pizza size
            quantity
pizza_size          
L            18956.0
M            15635.0
S            14403.0
XL             552.0
XXL             28.0


In [None]:
##What are the top 10 most expensive pizza
sel = data[['pizza_name', 'total_price']]
sel = sel.groupby('pizza_name')[['total_price']].mean().sort_values(ascending = False, by = 'total_price')
sel.head(10)


Unnamed: 0_level_0,total_price
pizza_name,Unnamed: 1_level_1
The Brie Carre Pizza,24.142708
The Greek Pizza,20.237624
The Five Cheese Pizza,19.180648
The Thai Chicken Pizza,18.762095
The Spicy Italian Pizza,18.458532
The Southwest Chicken Pizza,18.411538
The Mexicana Pizza,18.393372
The Italian Supreme Pizza,18.105327
The Barbecue Chicken Pizza,18.030354
The California Chicken Pizza,17.988488


In [None]:
# Choose 3 combination of ingredient. What ingredient do customers prefer to eat the most.
sel = data[['pizza_ingredients', 'quantity']]
sel = sel.groupby('pizza_ingredients')[['quantity']].sum().sort_values(ascending = False, by = 'quantity')
sel.head(3)

Unnamed: 0_level_0,quantity
pizza_ingredients,Unnamed: 1_level_1
"Pepperoni, Mushrooms, Red Onions, Red Peppers, Bacon",2453.0
"Barbecued Chicken, Red Peppers, Green Peppers, Tomatoes, Red Onions, Barbecue Sauce",2432.0
"Sliced Ham, Pineapple, Mozzarella Cheese",2422.0


In [None]:
# What pizza size makes the most sales
sel = data[['pizza_size', 'total_price']]
sel = sel.groupby('pizza_size')[['total_price']].sum().sort_values(ascending = False, by = 'total_price')
sel.head(3)

Unnamed: 0_level_0,total_price
pizza_size,Unnamed: 1_level_1
L,375318.7
M,249382.25
S,178076.5


In [None]:
# Of the 3 best selling Pizza size, Show the distribution of Pizza Sizes
sel = data[['pizza_size', 'total_price']]
sel = sel.loc[((data['pizza_size'] == 'L') | (data['pizza_size'] == 'M') | (data['pizza_size'] == 'S'))]
sel = sel.groupby('pizza_size')[['total_price']].mean().sort_values(ascending = False, by = 'total_price')
print('Total price of the pizza_size')
print(sel)

fig = pe.pie(data_frame= sel, names= sel.index, values = 'total_price', color= sel.index , hole = 0.2)
fig.show()

Total price of the pizza_size
            total_price
pizza_size             
L             20.259025
M             16.209441
S             12.596484
