# Importing libraries

In [2]:
import pathlib
import pandas as pd
import calendar
import numpy as np

from dash.dependencies import Input, Output
import plotly.express as px
import plotly.subplots as sp
import plotly.graph_objects as go
from dash import Dash, html, dcc

import os

In [3]:
dir =  os.getcwd()
FILE = dir + "/SQL/SQL_output.csv" 
RAIN = dir + "/Resources/2015_RainFall.csv"

In [4]:
rain = pd.read_csv(RAIN)
df = pd.read_csv(FILE)
df if df.isna().sum(axis=1).any() else print("No missing values") 

No missing values


In [5]:
# Formatting the data
df.rename(columns={'pizza_type_id': 'pizza_flavor'}, inplace=True) 
df['date'] = pd.to_datetime(df['date'])
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day
df['day_of_week'] = df['date'].dt.day_name()
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday','Sunday']
df['hour'] = pd.to_datetime(df['time']).dt.hour

# Creating order id
df['order_id'] = df['month'].apply(lambda x: str(x).zfill(2)) + df['day'].apply(lambda x: str(x).zfill(2)) + df['time']

# Create 'multiple_orders' column, 1 if the order_id is duplicated, 0 if not
df['multiple_orders'] = df.duplicated('order_id').astype(int)

# Fixing typing errors in 'ingredients' column
df['ingredients'] = df['ingredients'].str.replace(', ', ',') # one space
df['ingredients'] = df['ingredients'].str.replace(',  ', ',') # two spaces after comma

# Perform one-hot encoding on the 'ingredients' column
ingredients_dummies = df['ingredients'].str.get_dummies(',')

# Add the new columns to the original DataFrame
df = pd.concat([df, ingredients_dummies], axis=1)
df = df.drop('ingredients', axis=1) # Drop the OG 'ingredients' column
df 

  df['hour'] = pd.to_datetime(df['time']).dt.hour


Unnamed: 0,quantity,date,time,size,pizza_flavor,price,category,month,day,day_of_week,...,Sliced Ham,Smoked Gouda Cheese,Soppressata Salami,Spinach,Sun-dried Tomatoes,Thai Sweet Chilli Sauce,Thyme,Tomatoes,Zucchini,�Nduja Salami
0,1,2015-01-03,14:22:10,M,hawaiian,13.25,Classic,1,3,Saturday,...,1,0,0,0,0,0,0,0,0,0
1,1,2015-01-03,14:32:51,XL,the_greek,25.50,Classic,1,3,Saturday,...,0,0,0,0,0,0,0,1,0,0
2,1,2015-01-03,14:40:42,S,mediterraneo,12.00,Veggie,1,3,Saturday,...,0,0,0,1,1,0,0,0,0,0
3,1,2015-01-03,14:48:45,M,spinach_fet,16.00,Veggie,1,3,Saturday,...,0,0,0,1,0,0,0,0,0,0
4,1,2015-01-03,14:49:58,M,pepperoni,12.50,Classic,1,3,Saturday,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48615,1,2015-12-31,14:40:24,L,southw_ckn,20.75,Chicken,12,31,Thursday,...,0,0,0,0,0,0,0,1,0,0
48616,1,2015-12-31,14:40:24,M,southw_ckn,16.75,Chicken,12,31,Thursday,...,0,0,0,0,0,0,0,1,0,0
48617,1,2015-12-31,14:40:24,S,spicy_ital,12.50,Supreme,12,31,Thursday,...,0,0,0,0,0,0,0,1,0,0
48618,1,2015-12-31,14:43:46,L,napolitana,20.50,Classic,12,31,Thursday,...,0,0,0,0,0,0,0,1,0,0


In [6]:
pd.set_option('display.max_columns', None)

hourly_sum_quantity = df.groupby(['day_of_week', 'hour'])['quantity'].sum()
print('total quantity sold at this hour of this day of the week')
print(hourly_sum_quantity)


total quantity sold at this hour of this day of the week
day_of_week  hour
Friday       11       388
             12      1101
             13      1040
             14       525
             15       441
                     ... 
Wednesday    18       756
             19       516
             20       398
             21       271
             22       146
Name: quantity, Length: 95, dtype: int64


    # 4 employees if more than (1040 in a year) 20 pizzas sold in an hour
    # average price of pizza is $ 20. Suppose margin is 50% = $10. 
    # profit is 20 * 10 = 200 an hour. Wage for 4 employees is 60 an hour. 
    # wage over proft ratio = 0.30

    # 3 employees if more than (780 in a year) 15 pizzas sold in an hour.
    # average price of pizza is $ 20. Suppose margin is 50% = $10. 
    # profit is 15 * 10 = 150 an hour. Wage for 3 employees is 45 an hour. 
    # wage over proft ratio = 0.30

    # 2 employees if more than (520 in a year) 10 pizzas sold in an hour 
    # average price of pizza is $ 20. Suppose margin is 50% = $10.
    # profit is 10 * 10 = 100 an hour. Wage for 2 employees is 30 an hour.
    # wage over proft ratio = 0.30

    # 1 employee if more than (195 in a year) 3.75 pizzas sold in an hour
    # average price of pizza is $ 20. Suppose margin is 50% = $10.
    # profit is 3.75 * 10 = 37.5 an hour. Wage for 1 employee is 15 an hour.
    # wage over proft ratio = 0.4

    # 0 employees if less than (195 in a year) 3.75 pizzas sold in an hour (don't need to be open, price of wage > profit)

In [7]:
hourly_sum_quantity = hourly_sum_quantity.apply(lambda x: 4 if x > 1040 else 3 if x > 780 else 2 if x > 520 else 1 if x > 195 else 0)
hourly_sum_quantity_df = hourly_sum_quantity.reset_index(name='desired_employee_count')
hourly_sum_quantity_df['default_employee_count'] = hourly_sum_quantity_df['hour'].apply(lambda x: 3 if 10 < x < 20 else 2)

In [8]:

hourly_sum_quantity_df['wage'] = 15
hourly_sum_quantity_df['1_wk_difference_in_labor'] = hourly_sum_quantity_df['default_employee_count'] - hourly_sum_quantity_df['desired_employee_count']
hourly_sum_quantity_df['est_1_yr_difference_in_labor'] = hourly_sum_quantity_df['1_wk_difference_in_labor'] * 52
hourly_sum_quantity_df['1_wk_labor_cost_diff'] = hourly_sum_quantity_df['1_wk_difference_in_labor'] * hourly_sum_quantity_df['wage']
hourly_sum_quantity_df['est_1_yr_labor_cost_diff'] =hourly_sum_quantity_df['1_wk_labor_cost_diff'] * 52
total = hourly_sum_quantity_df.sum() 
print(total)

day_of_week                     FridayFridayFridayFridayFridayFridayFridayFrid...
hour                                                                         1560
desired_employee_count                                                        151
default_employee_count                                                        253
wage                                                                         1425
1_wk_difference_in_labor                                                      102
est_1_yr_difference_in_labor                                                 5304
1_wk_labor_cost_diff                                                         1530
est_1_yr_labor_cost_diff                                                    79560
dtype: object


In [9]:
# CSS styles
tabs_styles = {
    'height': '44px',
    'alignItems': 'center',
    'backgroundColor': '#1E1E1E', # dark grey
    'padding': '6px',
}

tab_style = {
    'backgroundColor': '#121212',  # darker grey tab
    'color': '#FFFFFF',  # white text always
    'padding': '10px',  
    'border': 'none'  # Remove borders
}

tab_selected_style = {
    'backgroundColor': '#2D2D2D',  # Selected tab, light grey
    'color': '#7FDBFF',  # changes to blue when selected
    'padding': '10px',  
    'borderRadius': '5px'  # Rounded corners for selected tab
}

app.layout = html.Div(style={
    'backgroundColor': '#303030',  # Dark background always
    'color': '#FFFFFF',  # White text always
    'fontFamily': '"Roboto", sans-serif',  # custom font
    'margin': '-8px',  
    'minHeight': '100vh',  # Full view height
    'padding': '35px'  # Padding everywhere
}, children=[
    html.H1("Python for Business Analytics - Jean Batista", style={'color': '#FFFFFF','textAlign': 'center'}), # center text color white 
    dcc.Tabs(id="tabs", value='tab-1', children=[
        dcc.Tab(label='Peak Hours', value='tab-1', style=tab_style, selected_style=tab_selected_style),
        dcc.Tab(label='Weather Correlation', value='tab-2', style=tab_style, selected_style=tab_selected_style),
        dcc.Tab(label='Variety', value='tab-3', style=tab_style, selected_style=tab_selected_style),
        dcc.Tab(label='Sizes', value='tab-4', style=tab_style, selected_style=tab_selected_style),
    ], style=tabs_styles),
    html.Div(id='content')
])

NameError: name 'app' is not defined

In [None]:
# Define the callback to update the content based on the selected tab
@app.callback(
    Output('content', 'children'),
    Input('tabs', 'value')
)

def render_content(tab):
    if tab == 'tab-1':
        return html.Div([
            html.H2('Orders each Hour', style={'color': '#FFFFFF'}),
            html.Div(
                'Minimize labor cost by optimizing staffing hours based on peak time intervals for orders. This change reduces staffed hours by 59.6%, cutting expenses by approximately $79,560 per year. (Assuming a wage of $15 per employee, and baseline default of 2-3 employees per hour depending time of day(red dotted line))',
                style={
                    'fontSize': '17px',  # slighter smaller than top text size
                    'marginBottom': '20px',  # Space below the sub-text
                    'marginTop': '5px'  # Space above the sub-text
                }
            ),
            dcc.Slider(
                id='day-slider',
                min=0,
                max=6,
                marks={i: day for i, day in enumerate(day_order)},
                value=0,
                step=None,
            ),
            html.Div([
                dcc.Graph(id='heatmap-graph'),
                dcc.Graph(id='employee-area-chart')  # New Graph for Employee Count
            ]) 
        ])
    elif tab == 'tab-2':
        return html.Div([
            html.H2('Rainfall and Total Quantity Sold'),
            dcc.Graph(
                id='subplot-graph',
                figure=update_subplot_graph()
            )
        ])
    elif tab == 'tab-3':
        return html.Div([
            html.H2('Pizza Types'),
            html.Label("Select a month:", style={'font-weight': 'bold', 'display': 'block', 'margin-bottom': '5px'}),
            dcc.Slider(
                id='month-slider',
                min=df['month'].min(),
                max=df['month'].max(),
                value=df['month'].min(),
                marks={int(i): {'label': calendar.month_name[i], 'style': {'transform': 'rotate(-45deg)', 'white-space': 'nowrap'}}
       for i in df['month'].unique()},
                step=None,
            ),
            html.Div(
                id='selected-pizza-stats',
                # This will display the selected pizza statistics, add some space below this div
                style={'margin-bottom': '20px'}  # Adjust the space as needed
            ),
            dcc.Graph(
                id='scatterplot',
                # Call the function with the default slider value
                figure=update_scatterplot(df['month'].min()),
                style={'margin-top': '20px'}  # Add space above the graph
            )
        ])
    elif tab == 'tab-4':
        return html.Div([
            html.H2('Pizza Sizes'),
            dcc.Graph(
                id='bar-chart',
                figure=update_bar_chart()
            ),
            dcc.Graph(
                id='pie-chart',
                figure=update_pie_chart()
            )
        ])
    

NameError: name 'app' is not defined

In [None]:
# Creating function that applies same style to all graphs   
def style_graph(graph): 
    graph.update_layout(
        paper_bgcolor='#303030',  # Dark background for the graph area
        plot_bgcolor='#303030',  # Dark background inside the graph
        font={'color': '#7FDBFF'},  # baby blue
        xaxis=dict(
            showgrid=False,  # Remove x-axis grid lines
            color='#7FDBFF',  # Baby blue always
            showline=True, 
            linewidth=2,  
            linecolor='#7FDBFF',  
        ),
        yaxis=dict(
            showgrid=False,  
            color='#7FDBFF',  
            showline=True,  
            linewidth=2,  
            linecolor='#7FDBFF',  
        ),
        yaxis2=dict( # for the second y axis in Weather Correlation
            showgrid=False,  
            color='#7FDBFF',  
            showline=True,  
            linewidth=2,  
            linecolor='#7FDBFF',  
            overlaying='y',  
            side='right',  
        ),
        legend=dict(
            bgcolor='rgba(0,0,0,0)',  # Transparent legend background
            bordercolor='#7FDBFF',  # Baby blue color text
        )
    )
    return graph


## Page 1 maximizing based on peak hours

In [None]:
# Slider tab 1 callback
@app.callback(
    Output('heatmap-graph', 'figure'),
    Input('day-slider', 'value')
)

def update_graph(day_index):
    day = day_order[day_index]
    return update_heatmap(day)

def update_heatmap(day):
    filtered_df = df[df['day_of_week'] == day]
    pizza_type_counts = filtered_df['pizza_flavor'].value_counts()
    popular_pizza_types = pizza_type_counts.nlargest(15).index.tolist() # for visibility. 
    filtered_df = filtered_df[filtered_df['pizza_flavor'].isin(popular_pizza_types)] # 32 pizza flavors were too much
    filtered_df['hour'] = pd.to_numeric(filtered_df['hour'])  
    filtered_df = filtered_df.sort_values('hour')  
    filtered_df['quantity'] = filtered_df['quantity'] / 52 # divide by 52 weeks to get average per week, rather than total per year
    heatmap_fig = px.density_heatmap(filtered_df, x='hour', y='pizza_flavor', z='quantity', color_continuous_scale='RdYlGn')
    heatmap_fig.update_layout(xaxis={'type': 'category'})  

    return style_graph(heatmap_fig)


@app.callback(
    Output('employee-area-chart', 'figure'),
    [Input('day-slider', 'value')]
)
def update_employee_area_chart(day_index):
    day = day_order[day_index]
    return generate_employee_area_chart(day)

def generate_employee_area_chart(day):
    daily_data = df[df['day_of_week'] == day]

    # Group by hour and calculate sum of quantity, 
    # then apply a function that determines the number of employees needed
    # based on the sum of quantity
    hourly_sum = daily_data.groupby('hour')['quantity'].sum()

    # 4 employees if more than (1040 in a year) 20 pizzas sold in an hour
    # average price of pizza is $ 20. Suppose margin is 50% = $10. 
    # profit is 20 * 10 = 200 an hour. Wage for 4 employees is 60 an hour. 
    # wage over proft ratio = 0.30

    # 3 employees if more than (780 in a year) 15 pizzas sold in an hour.
    # average price of pizza is $ 20. Suppose margin is 50% = $10. 
    # profit is 15 * 10 = 150 an hour. Wage for 3 employees is 45 an hour. 
    # wage over proft ratio = 0.30

    # 2 employees if more than (520 in a year) 10 pizzas sold in an hour 
    # average price of pizza is $ 20. Suppose margin is 50% = $10.
    # profit is 10 * 10 = 100 an hour. Wage for 2 employees is 30 an hour.
    # wage over proft ratio = 0.30

    # 1 employee if more than (195 in a year) 3.75 pizzas sold in an hour
    # average price of pizza is $ 20. Suppose margin is 50% = $10.
    # profit is 3.75 * 10 = 37.5 an hour. Wage for 1 employee is 15 an hour.
    # wage over proft ratio = 0.4

    # 0 employees if less than (195 in a year) 3.75 pizzas sold in an hour (don't need to be open, price of wage > profit)
    hourly_employee_count = hourly_sum.apply(lambda x: 4 if x > 1040 else 3 if x > 780 else 2 if x > 520 else 1 if x > 195 else 0)

    # Calculate default_employee_count dynamically
    default_employee_count = hourly_sum.index.to_series().apply(lambda x: 3 if 10 < x < 20 else 2)

    # Create area chart
    area_chart_fig = go.Figure()
    area_chart_fig.add_trace(go.Scatter(
        x=hourly_employee_count.index,
        y=hourly_employee_count,
        fill='tozeroy',
        mode='none',  # Remove line markers
        name='Optimized Employee Count'
    ))

    # Add a dynamic line for the calculated default employee count
    area_chart_fig.add_trace(go.Scatter(
        x=default_employee_count.index,
        y=default_employee_count,
        mode='lines',
        name='Dynamic Default Employee Count',
        line=dict(color='red', dash='dash')
    ))

    # Update layout
    area_chart_fig.update_layout(
        title=f'Employee Count by Hour for {day} | Optimized employee count is calculated to maintain a wage / profit ratio of .40 for 1 employee, and .30 for 2,3, or 4 employees.',
        margin=dict(l=140, r=40, t=40, b=40),
        xaxis_title='Hour',
        yaxis_title='Employee Count'
    )

    return style_graph(area_chart_fig)

NameError: name 'app' is not defined

## Page 2 rain correlation

In [None]:
def update_subplot_graph():
    results = df.groupby('month')['order_id'].count()
    months = list(range(1, 13))
    days_in_month = [calendar.monthrange(2015, month)[1] for month in months]
    adjusted_quantity = results / days_in_month 
    rainfall = rain.groupby('month').sum()

    # Create the subplot fig
    fig = sp.make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(go.Scatter(x=months, y=adjusted_quantity, name='Total Quantity Sold', line=dict(color='green')), secondary_y=False)
    fig.add_trace(go.Scatter(x=months, y=rainfall['rainfall'], name='Rainfall', line=dict(color='#3076ff')), secondary_y=True)
    fig.update_layout(
        xaxis=dict(title='Month', dtick=1),
        yaxis=dict(
            title='Total Quantity Sold',
            titlefont=dict(color='#7FDBFF'), # baby blue
            tickfont=dict(color='#7FDBFF')
        ),
        yaxis2=dict(
            title='Rainfall in inches',
            titlefont=dict(color='#7FDBFF'),# baby blue 
            tickfont=dict(color='#7FDBFF'), 
            anchor='x',
            overlaying='y',
            side='right'
        ),
        legend=dict(x=0, y=1.2, orientation='h')
    )

    return style_graph(fig)

## Page 3 Category and pizza flavors visualized

In [None]:
# Slider tab 3 callback
@app.callback(
    Output('scatterplot', 'figure'),
    [Input('month-slider', 'value')]
)
def update_scatterplot(selected_month):
    # Filter DataFrame based on the selected month
    monthly_df = df[df['month'] == selected_month]

    # Aggregate the data to find total quantity and category for each pizza flavor
    flavor_popularity = monthly_df.groupby(['pizza_flavor', 'category'])['quantity'].sum().reset_index()

    # Create scatter plot, color by 'category'
    scatterplot_fig = px.scatter(flavor_popularity, x='pizza_flavor', y='quantity',
        color='category',  # Set the color of the circles based on the 'category'
        title='Popularity of Pizza Flavors by Category',
        labels={'quantity': 'Total Quantity Sold'},
        size='quantity',  # Optional: Use the quantity as the size of the scatter plot markers
        hover_name='pizza_flavor')  # Optional: Show the pizza flavor name on hover

    # Update layout
    scatterplot_fig.update_layout(xaxis={'categoryorder': 'total descending'})

    return style_graph(scatterplot_fig)

NameError: name 'app' is not defined

# Page 4 Sizes 

In [None]:
# I have the pie chart to evaluate if XL and XXL are viable options
def update_pie_chart():
    sizes = df.groupby('size')['quantity'].sum().reset_index()
    size_order = ['S', 'M', 'L', 'XL', 'XXL']
    sizes['size'] = sizes['size'].astype('category')
    sizes['size'] = sizes['size'].cat.set_categories(size_order)
    sizes.sort_values("size", inplace=True)

    pie_color_pallete = ['rgb(210,242,212)', 'rgb(123,227,130)','rgb(38,204,0)','rgb(34,182,0)','rgb(0, 156, 26)']
    pie_chart_fig = go.Figure(data=go.Pie(labels=sizes['size'], values=sizes['quantity'], marker=dict(colors=pie_color_pallete), hole=0.9, sort=False))
    pie_chart_fig.update_layout(title='Pizza Sizes Distribution', showlegend=True)

    return style_graph(pie_chart_fig)

# I've removed the greek, brie carre, big meats, and five cheese because they would represent outlier
# Because of their size options
def update_bar_chart():
    filtered_df_pie = df[df['pizza_flavor'] == 'the_greek']
    filtered_df_bar = df.copy()

    excluded_sizes = ['XL', 'XXL']
    filtered_df_bar = filtered_df_bar[~filtered_df_bar['size'].isin(excluded_sizes)]

    excluded_pizza_types = ['big_meat', 'brie_carre', 'five_cheese']
    filtered_df_bar = filtered_df_bar[~filtered_df_bar['pizza_flavor'].isin(excluded_pizza_types)]

    size_quantity_bar = filtered_df_bar.groupby('size')['quantity'].sum().reset_index().sort_values('quantity')

    bar_chart_fig = go.Figure(data=go.Bar(x=size_quantity_bar['size'], y=size_quantity_bar['quantity']))

    bar_chart_fig.update_layout(title='Orders by Size',
                                xaxis_title='Size',
                                yaxis_title='Total Quantity Sold')

    return style_graph(bar_chart_fig)

In [29]:

sizes = df.groupby('size')['quantity'].sum().reset_index()
size_order = ['S', 'M', 'L', 'XL', 'XXL']
sizes['size'] = sizes['size'].astype('category')
sizes['size'] = sizes['size'].cat.set_categories(size_order)
sizes.sort_values("size", inplace=True)

pie_color_pallete = ['rgb(210,242,212)', 'rgb(123,227,130)','rgb(38,204,0)','rgb(34,182,0)','rgb(0, 156, 26)']
pie_chart_fig = go.Figure(data=go.Pie(labels=sizes['size'], values=sizes['quantity'], marker=dict(colors=pie_color_pallete), hole=0.9, sort=False))
pie_chart_fig.update_layout(title='Pizza Sizes Distribution', showlegend=True)

pie_chart_fig





In [48]:
unique_sizes = df.groupby('pizza_flavor')['size'].nunique()

# Pizzas that come in only one unique size
unique_size_pizzas = unique_sizes[unique_sizes == 1]
unique_size_pizzas_df = df[df['pizza_flavor'].isin(unique_size_pizzas.index)]
unique_size_pizzas_df

Unnamed: 0,quantity,date,time,size,pizza_flavor,price,category,month,day,day_of_week,hour,order_id,multiple_orders,Alfredo Sauce,Anchovies,Artichoke,Artichokes,Arugula,Asiago Cheese,Bacon,Barbecue Sauce,Barbecued Chicken,Beef Chuck Roast,Blue Cheese,Brie Carre Cheese,Calabrese Salami,Capocollo,Caramelized Onions,Chicken,Chipotle Sauce,Chorizo Sausage,Cilantro,Coarse Sicilian Salami,Corn,Eggplant,Feta Cheese,Fontina Cheese,Friggitello Peppers,Garlic,Genoa Salami,Goat Cheese,Gorgonzola Piccante Cheese,Gouda Cheese,Green Olives,Green Peppers,Italian Sausage,Jalapeno Peppers,Kalamata Olives,Luganega Sausage,Mozzarella Cheese,Mushrooms,Onions,Oregano,Pancetta,Parmigiano Reggiano Cheese,Pears,Peperoncini verdi,Pepperoni,Pesto Sauce,Pineapple,Plum Tomatoes,Prosciutto,Prosciutto di San Daniele,Provolone Cheese,Red Onions,Red Peppers,Ricotta Cheese,Romano Cheese,Sliced Ham,Smoked Gouda Cheese,Soppressata Salami,Spinach,Sun-dried Tomatoes,Thai Sweet Chilli Sauce,Thyme,Tomatoes,Zucchini,�Nduja Salami
31,1,2015-01-03,16:51:22,S,big_meat,12.00,Classic,1,3,Saturday,16,010316:51:22,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
42,1,2015-01-03,17:02:27,S,big_meat,12.00,Classic,1,3,Saturday,17,010317:02:27,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
55,1,2015-01-03,17:37:49,S,big_meat,12.00,Classic,1,3,Saturday,17,010317:37:49,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
71,1,2015-01-03,18:50:10,S,brie_carre,23.65,Supreme,1,3,Saturday,18,010318:50:10,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
83,1,2015-01-03,19:37:12,L,five_cheese,18.50,Veggie,1,3,Saturday,19,010319:37:12,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48587,1,2015-12-31,12:09:59,S,brie_carre,23.65,Supreme,12,31,Thursday,12,123112:09:59,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
48594,1,2015-12-31,12:44:37,L,five_cheese,18.50,Veggie,12,31,Thursday,12,123112:44:37,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0
48596,1,2015-12-31,12:48:36,S,big_meat,12.00,Classic,12,31,Thursday,12,123112:48:36,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
48599,1,2015-12-31,13:04:18,L,five_cheese,18.50,Veggie,12,31,Thursday,13,123113:04:18,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0


In [53]:

flavor_popularity = unique_size_pizzas_df.groupby(['pizza_flavor', 'size'])['quantity'].sum().reset_index()
flavor_popularity
# Create scatter plot, color by 'category'
scatterplot_fig = px.scatter(flavor_popularity, x='pizza_flavor', y='quantity',
    color='size',  # Set the color of the circles based on the 'category'
    title='Unique Pizza Flavors',
    labels={'quantity': 'Sales', 'pizza_flavor': 'Pizza Flavor'},
    size='quantity',  # Optional: Use the quantity as the size of the scatter plot markers
    hover_name='pizza_flavor')  # Optional: Show the pizza flavor name on hover

# Update layout
scatterplot_fig.update_layout(xaxis={'categoryorder': 'total descending'})
scatterplot_fig

In [64]:
unique_size_pizzas = unique_sizes[unique_sizes == 1]
unique_size_pizzas_df = df[df['pizza_flavor'].isin(unique_size_pizzas.index)]
median_orders = df.groupby('pizza_flavor')['quantity'].sum().median()
diff_from_median = abs(df.groupby('pizza_flavor')['quantity'].sum() - median_orders)
balanced_flavors = diff_from_median.nsmallest(5).index
balanced_flavors_df = df[df['pizza_flavor'].isin(balanced_flavors)]
combined_df = pd.concat([unique_size_pizzas_df, balanced_flavors_df])
combined_df['unique'] = np.where(combined_df['pizza_flavor'].isin(unique_size_pizzas.index), 'Only comes in one size', 'Small, Medium, or Large')
combined_flavor_popularity = combined_df.groupby(['pizza_flavor', 'unique'])['quantity'].sum().reset_index()
scatterplot_fig = go.Figure()
for color, group in combined_flavor_popularity.groupby('unique'):
    scatterplot_fig.add_trace(go.Scatter(
        x=group['pizza_flavor'], 
        y=group['quantity'],
        mode='markers',
        name=color,
        hovertext=group['pizza_flavor']  # Show the pizza flavor name on hover
    ))

scatterplot_fig.update_layout(
    title='Unique Pizza Flavors',
    xaxis={'categoryorder': 'total descending', 'title': 'Pizza Flavor'},
    yaxis={'title': 'Sales'}
)

scatterplot_fig

In [77]:

greek_df = df[df['pizza_flavor'] == 'the_greek']
greek_group = greek_df.groupby('size')['quantity'].sum().reset_index()
size_order = ['S', 'M', 'L', 'XL', 'XXL']
greek_group['size'] = greek_group['size'].astype('category')
greek_group['size'] = greek_group['size'].cat.set_categories(size_order)
greek_group.sort_values("size", inplace=True)
pie_color_pallete = ['rgb(210,242,212)', 'rgb(130,210,130)','rgb(73,170,0)','rgb(42,126,0)','rgb(0, 80, 26)']

greek_chart_fig = go.Figure(data=go.Pie(labels=greek_group['size'], values=greek_group['quantity'],marker=dict(colors=pie_color_pallete), sort= False, hole=0.9))

greek_chart_fig.update_layout(title='Pizza Sizes Distribution for "The Greek"',
                                showlegend=True)


greek_chart_fig

## Other stuff

In [None]:
if __name__ == '__main__':
    app.run_server()

NameError: name 'app' is not defined