In [72]:
# IMPORT STATEMENTS

#Import Python packages
import pandas as pd
import json
import matplotlib.pyplot as plt
import json

import ast

# Import Snowflake modules
from snowflake.snowpark import Session

In [73]:
# Get account credentials from a json file
with open("data_scientist_auth.json") as f:
    data = json.load(f)
    username = data["username"]
    password = data["password"]
    account = data["account"]

# Specify connection parameters
connection_parameters = {
    "account": account,
    "user": username,
    "password": password,
    "role": "TASTY_BI",
    "warehouse": "TASTY_BI_WH",
    "database": "frostbyte_tasty_bytes",
    "schema": "analytics",
}

# Create Snowpark session
session = Session.builder.configs(connection_parameters).create()

In [74]:
# retrieve order details usa table from snowflake
order_data_from_snowflake = session.table("frostbyte_tasty_bytes.analytics.ORDER_DETAILS_USA_MATCHED")

In [75]:
# convert order_data_from_snowflake to pandas dataframe
order_df = order_data_from_snowflake.to_pandas()

In [76]:
# retrieve menu data from snowflake
menu_data_from_snowflake = session.table("frostbyte_tasty_bytes.raw_pos.MENU")

In [77]:
# convert menu_data_from_snowflake to pandas dataframe
menu_df = menu_data_from_snowflake.to_pandas()

In [78]:
# preview order table
order_df.head()

Unnamed: 0,ORDER_ID,CUSTOMER_ID,TRUCK_ID,LOCATION_ID,SHIFT_ID,SHIFT_START_TIME,SHIFT_END_TIME,ORDER_CHANNEL,ORDER_TS,SERVED_TS,...,E_MAIL,PHONE_NUMBER,ORDER_DETAIL_ID,MENU_ITEM_ID,DISCOUNT_ID,LINE_NUMBER,QUANTITY,UNIT_PRICE,PRICE,ORDER_ITEM_DISCOUNT_AMOUNT
0,447758081,101191,61,15475.0,200524445,15:30:00,22:30:00,,2022-09-22 18:13:43,,...,Zain.Sullivan@hotmail.com,659-899-8290,872457141,19,,0,1,3.0,3.0,
1,447758081,101191,61,15475.0,200524445,15:30:00,22:30:00,,2022-09-22 18:13:43,,...,Zain.Sullivan@hotmail.com,659-899-8290,872457142,11,,1,1,6.0,6.0,
2,447758081,101191,61,15475.0,200524445,15:30:00,22:30:00,,2022-09-22 18:13:43,,...,Zain.Sullivan@hotmail.com,659-899-8290,872457143,15,,2,1,3.0,3.0,
3,447759739,195383,62,2588.0,200524447,15:30:00,22:30:00,,2022-09-22 19:46:15,,...,Aryana.Dennis@hotmail.com,437-446-0786,872462265,24,,0,1,2.0,2.0,
4,447759739,195383,62,2588.0,200524447,15:30:00,22:30:00,,2022-09-22 19:46:15,,...,Aryana.Dennis@hotmail.com,437-446-0786,872462266,27,,1,1,6.0,6.0,


In [79]:
# preview menu table
menu_df.head()

Unnamed: 0,MENU_ID,MENU_TYPE_ID,MENU_TYPE,TRUCK_BRAND_NAME,MENU_ITEM_ID,MENU_ITEM_NAME,ITEM_CATEGORY,ITEM_SUBCATEGORY,COST_OF_GOODS_USD,SALE_PRICE_USD,MENU_ITEM_HEALTH_METRICS_OBJ
0,10001,1,Ice Cream,Freezing Point,10,Lemonade,Beverage,Cold Option,0.65,3.5,"{\n ""menu_item_health_metrics"": [\n {\n ..."
1,10002,1,Ice Cream,Freezing Point,11,Sugar Cone,Dessert,Cold Option,2.5,6.0,"{\n ""menu_item_health_metrics"": [\n {\n ..."
2,10003,1,Ice Cream,Freezing Point,12,Waffle Cone,Dessert,Cold Option,2.5,6.0,"{\n ""menu_item_health_metrics"": [\n {\n ..."
3,10004,1,Ice Cream,Freezing Point,13,Two Scoop Bowl,Dessert,Cold Option,3.0,7.0,"{\n ""menu_item_health_metrics"": [\n {\n ..."
4,10005,1,Ice Cream,Freezing Point,14,Bottled Water,Beverage,Cold Option,0.5,2.0,"{\n ""menu_item_health_metrics"": [\n {\n ..."


In [80]:
# Convert the string JSON data to a nested dictionary
menu_df['MENU_ITEM_HEALTH_METRICS_OBJ'] = menu_df['MENU_ITEM_HEALTH_METRICS_OBJ'].apply(ast.literal_eval)

# Use json_normalize to flatten the nested JSON data
menu_item_metrics = pd.json_normalize(menu_df['MENU_ITEM_HEALTH_METRICS_OBJ'], record_path='menu_item_health_metrics')

# Rename the columns
menu_item_metrics = menu_item_metrics.rename(columns={
    'is_dairy_free_flag': 'DAIRY_FREE',
    'is_gluten_free_flag': 'GLUTEN_FREE',
    'is_healthy_flag': 'HEALTHY',
    'is_nut_free_flag': 'NUT_FREE'
})

# Replace 'Y' with 'Yes' and 'N' with 'No' in the DataFrame
menu_item_metrics = menu_item_metrics.replace({'Y': 1, 'N': 0})

# Concatenate the flattened DataFrame with the original DataFrame
menu_df = pd.concat([menu_df, menu_item_metrics], axis=1)

# Drop the original 'MENU_ITEM_HEALTH_METRICS_OBJ' and 'ingredients' column 
menu_df = menu_df.drop(columns=['MENU_ITEM_HEALTH_METRICS_OBJ', 'ingredients'])

In [81]:
menu_df.head()

Unnamed: 0,MENU_ID,MENU_TYPE_ID,MENU_TYPE,TRUCK_BRAND_NAME,MENU_ITEM_ID,MENU_ITEM_NAME,ITEM_CATEGORY,ITEM_SUBCATEGORY,COST_OF_GOODS_USD,SALE_PRICE_USD,DAIRY_FREE,GLUTEN_FREE,HEALTHY,NUT_FREE
0,10001,1,Ice Cream,Freezing Point,10,Lemonade,Beverage,Cold Option,0.65,3.5,1,1,0,1
1,10002,1,Ice Cream,Freezing Point,11,Sugar Cone,Dessert,Cold Option,2.5,6.0,1,1,0,1
2,10003,1,Ice Cream,Freezing Point,12,Waffle Cone,Dessert,Cold Option,2.5,6.0,0,0,0,1
3,10004,1,Ice Cream,Freezing Point,13,Two Scoop Bowl,Dessert,Cold Option,3.0,7.0,0,1,0,1
4,10005,1,Ice Cream,Freezing Point,14,Bottled Water,Beverage,Cold Option,0.5,2.0,1,1,1,1


In [82]:
# Get the total quantity sold for each menu item 
## group by 'MENU_ITEM_ID' and calculate the total quantity sold
total_qty_sold_per_item = order_df.groupby('MENU_ITEM_ID')['QUANTITY'].sum().reset_index()

## rename the 'QUANTITY' column to 'TOTAL_QTY_SOLD'
total_qty_sold_per_item = total_qty_sold_per_item.rename(columns={'QUANTITY': 'TOTAL_QTY_SOLD'})

## merge total_qty_sold_per_item with final_product_df
menu_df = pd.merge(menu_df, total_qty_sold_per_item, on='MENU_ITEM_ID')

In [83]:
menu_df.head()

Unnamed: 0,MENU_ID,MENU_TYPE_ID,MENU_TYPE,TRUCK_BRAND_NAME,MENU_ITEM_ID,MENU_ITEM_NAME,ITEM_CATEGORY,ITEM_SUBCATEGORY,COST_OF_GOODS_USD,SALE_PRICE_USD,DAIRY_FREE,GLUTEN_FREE,HEALTHY,NUT_FREE,TOTAL_QTY_SOLD
0,10001,1,Ice Cream,Freezing Point,10,Lemonade,Beverage,Cold Option,0.65,3.5,1,1,0,1,607
1,10002,1,Ice Cream,Freezing Point,11,Sugar Cone,Dessert,Cold Option,2.5,6.0,1,1,0,1,1815
2,10003,1,Ice Cream,Freezing Point,12,Waffle Cone,Dessert,Cold Option,2.5,6.0,0,0,0,1,1786
3,10004,1,Ice Cream,Freezing Point,13,Two Scoop Bowl,Dessert,Cold Option,3.0,7.0,0,1,0,1,1682
4,10005,1,Ice Cream,Freezing Point,14,Bottled Water,Beverage,Cold Option,0.5,2.0,1,1,1,1,629


In [84]:
final_df = menu_df.drop(["MENU_ID", "MENU_TYPE_ID", "MENU_ITEM_ID", "MENU_ITEM_NAME", "COST_OF_GOODS_USD"]
                        , axis=1)

In [85]:
final_df.head()

Unnamed: 0,MENU_TYPE,TRUCK_BRAND_NAME,ITEM_CATEGORY,ITEM_SUBCATEGORY,SALE_PRICE_USD,DAIRY_FREE,GLUTEN_FREE,HEALTHY,NUT_FREE,TOTAL_QTY_SOLD
0,Ice Cream,Freezing Point,Beverage,Cold Option,3.5,1,1,0,1,607
1,Ice Cream,Freezing Point,Dessert,Cold Option,6.0,1,1,0,1,1815
2,Ice Cream,Freezing Point,Dessert,Cold Option,6.0,0,0,0,1,1786
3,Ice Cream,Freezing Point,Dessert,Cold Option,7.0,0,1,0,1,1682
4,Ice Cream,Freezing Point,Beverage,Cold Option,2.0,1,1,1,1,629
