In [53]:
# Import Snowflake modules
from snowflake.snowpark import Session
import snowflake.snowpark.functions as F
import snowflake.snowpark.types as T
from snowflake.snowpark import Window

import pandas
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import json

In [68]:
# Get account credentials from a json file
with open("data_scientist_auth.json") as f:
    data = json.load(f)
    username = data["username"]
    password = data["password"]
    account = data["account"]

# Specify connection parameters
connection_parameters = {
    "account": account,
    "user": username,
    "password": password,
    "role": "TASTY_BI",
    "warehouse": "TASTY_BI_WH",
    "database": "frostbyte_tasty_bytes",
    "schema": "analytics",
}

# Create Snowpark session
session = Session.builder.configs(connection_parameters).create()

In [70]:
SENTIMENT_DATA = session.table("frostbyte_tasty_bytes_setup_s.raw_survey.sentiment_survey_ds")

TRUCK_DATA = session.table("frostbyte_tasty_bytes_setup_s.raw_pos.truck")

In [71]:
SENTIMENT_DATA.show()

-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"SURVEY_ID"       |"SURVEY_TS"  |"TRUCK_ID"  |"COMMENTS"                                    |"NPS_SCORE"  |"NPS_CATEGORY"  |"OVERALL_SENTIMENT"  |"ASPECT_SENTIMENT"                           |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|60509-SL          |2023-01-25   |291         |                                              |60.0         |Passive         |0.0                  |[                                            |
|                  |             |            |                                              |             |                |                     |  {                                          |
|                  |          

In [72]:
TRUCK_DATA.show()

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"TRUCK_ID"  |"MENU_TYPE_ID"  |"PRIMARY_CITY"  |"REGION"    |"ISO_REGION"  |"COUNTRY"      |"ISO_COUNTRY_CODE"  |"FRANCHISE_FLAG"  |"YEAR"  |"MAKE"        |"MODEL"           |"EV_FLAG"  |"FRANCHISE_ID"  |"TRUCK_OPENING_DATE"  |
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|3           |3               |San Mateo       |California  |CA            |United States  |US                  |1                 |2004    |Freightliner  |MT45 Utilimaster  |0          |2               |2021-10-01            |
|4           |4               |San Mateo       |California  |CA            |United State

In [73]:
SENTIMENT = TRUCK_DATA.join(SENTIMENT_DATA, SENTIMENT_DATA["TRUCK_ID"] == TRUCK_DATA["TRUCK_ID"], "inner")

In [74]:
SENTIMENT2 = SENTIMENT.dropna()

In [75]:
AVG_NPS_SCORE = SENTIMENT2.with_column("SURVEY_TS_YEAR", F.year("SURVEY_TS"))
AVG_NPS_SCORE = AVG_NPS_SCORE.with_column("SURVEY_TS_MONTH", F.month("SURVEY_TS"))
AVG_NPS_SCORE = AVG_NPS_SCORE.with_column("YEAR_MONTH", F.concat(F.col("SURVEY_TS_YEAR"), F.col("SURVEY_TS_MONTH")))
AVG_NPS_SCORE_YEAR = AVG_NPS_SCORE.groupBy(F.col("SURVEY_TS_YEAR")).agg(F.avg("NPS_SCORE"))
AVG_NPS_SCORE_MONTH = AVG_NPS_SCORE.groupBy(F.col("SURVEY_TS_MONTH")).agg(F.avg("NPS_SCORE"))
AVG_NPS_SCORE_YM = AVG_NPS_SCORE.groupBy(F.col("YEAR_MONTH")).agg(F.avg("NPS_SCORE"))
AVG_NPS_SCORE_YEAR.show()
AVG_NPS_SCORE_MONTH.show()
AVG_NPS_SCORE_YM.show()

----------------------------------------
|"SURVEY_TS_YEAR"  |"AVG(NPS_SCORE)"   |
----------------------------------------
|2023              |70.0               |
|2019              |89.78290366350068  |
|2020              |82.65104808877929  |
|2021              |70.78996630475477  |
|2022              |73.91523935002196  |
----------------------------------------

-----------------------------------------
|"SURVEY_TS_MONTH"  |"AVG(NPS_SCORE)"   |
-----------------------------------------
|1                  |77.3211009174312   |
|4                  |76.62244897959184  |
|5                  |73.8928210313448   |
|8                  |74.87179487179488  |
|6                  |75.06467661691542  |
|2                  |79.09722222222223  |
|11                 |78.18380743982495  |
|9                  |74.67253176930596  |
|7                  |73.42359767891683  |
|10                 |78.1470588235294   |
-----------------------------------------

------------------------------------
|"YE

In [76]:
NPS_COMPARE = session.table(name = "NPS_COMPARE")

In [88]:
NPS_COMPARE.show()

------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"CUSTOMER_ID"  |"CITY"  |"DATE"      |"ORDER_TOTAL"  |"TOTAL_QUANTITY"  |"DAYS_TO_NEXT_ORDER"  |"DAYS_SINCE_PRIOR_ORDER"  |"AVG_DAYS_BETWEEN_PURCHASES"  |"MAX_DAYS_WITHOUT_ITEM"  |"MIN_DAYS_WITHOUT_ITEM"  |"FREQUENCY"  |"MONETARYVALUE"  |"AGE"  |"NextPurchaseDayRange"  |"YEAR"  |"MONTH"  |"YEAR_MONTH"  |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|81445          |Sydney  |2020-04-20  |104.0          |12                |29   

In [78]:
NPS_COMPARE = NPS_COMPARE.with_column("YEAR", F.year("DATE"))
NPS_COMPARE = NPS_COMPARE.with_column("MONTH", F.month("DATE"))
NPS_COMPARE = NPS_COMPARE.with_column("YEAR_MONTH", F.concat(F.col("YEAR"), F.col("MONTH")))
NPS_COMPARE.show()

------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"CUSTOMER_ID"  |"CITY"  |"DATE"      |"ORDER_TOTAL"  |"TOTAL_QUANTITY"  |"DAYS_TO_NEXT_ORDER"  |"DAYS_SINCE_PRIOR_ORDER"  |"AVG_DAYS_BETWEEN_PURCHASES"  |"MAX_DAYS_WITHOUT_ITEM"  |"MIN_DAYS_WITHOUT_ITEM"  |"FREQUENCY"  |"MONETARYVALUE"  |"AGE"  |"NextPurchaseDayRange"  |"YEAR"  |"MONTH"  |"YEAR_MONTH"  |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|81445          |Sydney  |2020-04-20  |104.0          |12                |29   

In [79]:
AVG_NPDR_YEAR = NPS_COMPARE.groupBy(F.col("YEAR")).agg(F.avg('"DAYS_TO_NEXT_ORDER"'))
AVG_NPDR_MONTH = NPS_COMPARE.groupBy(F.col("MONTH")).agg(F.avg('"DAYS_TO_NEXT_ORDER"'))
AVG_NPDR_YM = NPS_COMPARE.groupBy(F.col("YEAR_MONTH")).agg(F.avg('"DAYS_TO_NEXT_ORDER"'))
AVG_NPDR_YEAR.show()
AVG_NPS_SCORE_YEAR.show()
AVG_NPDR_MONTH.show()
AVG_NPS_SCORE_MONTH.show()
AVG_NPDR_YM.show()
AVG_NPS_SCORE_YM.show()

--------------------------------------
|"YEAR"  |"AVG(DAYS_TO_NEXT_ORDER)"  |
--------------------------------------
|2020    |26.247026                  |
|2021    |17.932675                  |
|2022    |12.775070                  |
--------------------------------------

----------------------------------------
|"SURVEY_TS_YEAR"  |"AVG(NPS_SCORE)"   |
----------------------------------------
|2023              |70.0               |
|2019              |89.78290366350068  |
|2020              |82.65104808877929  |
|2021              |70.78996630475477  |
|2022              |73.91523935002196  |
----------------------------------------

---------------------------------------
|"MONTH"  |"AVG(DAYS_TO_NEXT_ORDER)"  |
---------------------------------------
|4        |17.842293                  |
|5        |17.077169                  |
|6        |16.335789                  |
|7        |17.121073                  |
|9        |15.773666                  |
|10       |15.706048                

In [80]:
AVG_SPEND_YEAR = NPS_COMPARE.group_by(F.col("YEAR")).agg(F.avg("ORDER_TOTAL"))
AVG_SPEND_YEAR.show()

-------------------------------
|"YEAR"  |"AVG(ORDER_TOTAL)"  |
-------------------------------
|2020    |43.550487160064755  |
|2021    |42.299351424623104  |
|2022    |40.73021891392013   |
-------------------------------



In [86]:
NPS_COMPARE.select("CUSTOMER_ID").distinct().count()

15217

In [92]:
CUSTOMER_BY_YEAR = NPS_COMPARE.group_by(F.col("YEAR")).agg(F.count_distinct("CUSTOMER_ID"))
CUSTOMER_BY_YEAR.show()

------------------------------------------
|"YEAR"  |"COUNT(DISTINCT CUSTOMER_ID)"  |
------------------------------------------
|2020    |15159                          |
|2021    |15217                          |
|2022    |15217                          |
------------------------------------------



In [66]:
session.close()