In [141]:
# Import Snowflake modules
from snowflake.snowpark import Session
import snowflake.snowpark.functions as F
import snowflake.snowpark.types as T
from snowflake.snowpark import Window

import pandas
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import json

In [142]:
# Get account credentials from a json file
with open("data_scientist_auth.json") as f:
    data = json.load(f)
    username = data["username"]
    password = data["password"]
    account = data["account"]

# Specify connection parameters
connection_parameters = {
    "account": account,
    "user": username,
    "password": password,
    "role": "TASTY_BI",
    "warehouse": "TASTY_BI_WH",
    "database": "frostbyte_tasty_bytes",
    "schema": "analytics",
}

# Create Snowpark session
session = Session.builder.configs(connection_parameters).create()

In [143]:
SENTIMENT_DATA = session.table("frostbyte_tasty_bytes_setup_s.raw_survey.sentiment_survey_ds")

TRUCK_DATA = session.table("frostbyte_tasty_bytes_setup_s.raw_pos.truck")

In [144]:
SENTIMENT_DATA.show()

-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"SURVEY_ID"       |"SURVEY_TS"  |"TRUCK_ID"  |"COMMENTS"                                    |"NPS_SCORE"  |"NPS_CATEGORY"  |"OVERALL_SENTIMENT"  |"ASPECT_SENTIMENT"                           |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|60509-SL          |2023-01-25   |291         |                                              |60.0         |Passive         |0.0                  |[                                            |
|                  |             |            |                                              |             |                |                     |  {                                          |
|                  |          

In [145]:
TRUCK_DATA.show()

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"TRUCK_ID"  |"MENU_TYPE_ID"  |"PRIMARY_CITY"  |"REGION"    |"ISO_REGION"  |"COUNTRY"      |"ISO_COUNTRY_CODE"  |"FRANCHISE_FLAG"  |"YEAR"  |"MAKE"        |"MODEL"           |"EV_FLAG"  |"FRANCHISE_ID"  |"TRUCK_OPENING_DATE"  |
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|3           |3               |San Mateo       |California  |CA            |United States  |US                  |1                 |2004    |Freightliner  |MT45 Utilimaster  |0          |2               |2021-10-01            |
|4           |4               |San Mateo       |California  |CA            |United State

In [146]:
SENTIMENT = TRUCK_DATA.join(SENTIMENT_DATA, SENTIMENT_DATA["TRUCK_ID"] == TRUCK_DATA["TRUCK_ID"], "inner")

In [147]:
SENTIMENT2 = SENTIMENT.dropna()

In [165]:
AVG_NPS_SCORE = SENTIMENT2.with_column("SURVEY_TS_YEAR", F.year("SURVEY_TS"))
AVG_NPS_SCORE = AVG_NPS_SCORE.with_column("SURVEY_TS_MONTH", F.month("SURVEY_TS"))
AVG_NPS_SCORE = AVG_NPS_SCORE.with_column("YEAR_MONTH", F.concat(F.col("SURVEY_TS_YEAR"), F.col("SURVEY_TS_MONTH")))
AVG_NPS_SCORE_YEAR = AVG_NPS_SCORE.groupBy(F.col("SURVEY_TS_YEAR")).agg(F.avg("NPS_SCORE"))
AVG_NPS_SCORE_MONTH = AVG_NPS_SCORE.groupBy(F.col("SURVEY_TS_MONTH")).agg(F.avg("NPS_SCORE"))
AVG_NPS_SCORE_YM = AVG_NPS_SCORE.groupBy(F.col("SURVEY_TS_YEAR"), F.col("SURVEY_TS_MONTH")).agg(F.avg("NPS_SCORE"))
AVG_NPS_SCORE_YEAR.show()
AVG_NPS_SCORE_MONTH.show()
AVG_NPS_SCORE_YM.show()

----------------------------------------
|"SURVEY_TS_YEAR"  |"AVG(NPS_SCORE)"   |
----------------------------------------
|2023              |70.0               |
|2019              |89.78290366350068  |
|2020              |82.65104808877929  |
|2021              |70.78996630475477  |
|2022              |73.91523935002196  |
----------------------------------------

-----------------------------------------
|"SURVEY_TS_MONTH"  |"AVG(NPS_SCORE)"   |
-----------------------------------------
|1                  |77.3211009174312   |
|5                  |73.8928210313448   |
|2                  |79.09722222222223  |
|7                  |73.42359767891683  |
|10                 |78.1470588235294   |
|3                  |75.85144927536231  |
|4                  |76.62244897959184  |
|8                  |74.87179487179488  |
|6                  |75.06467661691542  |
|11                 |78.18380743982495  |
-----------------------------------------

-----------------------------------------

In [149]:
NPS_COMPARE = session.table(name = "NPS_COMPARE")

In [150]:
NPS_COMPARE.show()

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"CUSTOMER_ID"  |"CITY"  |"DATE"      |"ORDER_TOTAL"  |"TOTAL_QUANTITY"  |"DAYS_TO_NEXT_ORDER"  |"DAYS_SINCE_PRIOR_ORDER"  |"AVG_DAYS_BETWEEN_PURCHASES"  |"MAX_DAYS_WITHOUT_ITEM"  |"MIN_DAYS_WITHOUT_ITEM"  |"FREQUENCY"  |"MONETARYVALUE"  |"TIME_OF_DAY"  |"AGE"  |
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|154886         |London  |2020-04-04  |26.0000        |2                 |17                    |70                        |70                            |70                       |70                       |1

In [151]:
NPS_COMPARE = NPS_COMPARE.with_column("YEAR", F.year("DATE"))
NPS_COMPARE = NPS_COMPARE.with_column("MONTH", F.month("DATE"))
NPS_COMPARE = NPS_COMPARE.with_column("YEAR_MONTH", F.concat(F.col("YEAR"), F.col("MONTH")))
NPS_COMPARE.show()

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"CUSTOMER_ID"  |"CITY"  |"DATE"      |"ORDER_TOTAL"  |"TOTAL_QUANTITY"  |"DAYS_TO_NEXT_ORDER"  |"DAYS_SINCE_PRIOR_ORDER"  |"AVG_DAYS_BETWEEN_PURCHASES"  |"MAX_DAYS_WITHOUT_ITEM"  |"MIN_DAYS_WITHOUT_ITEM"  |"FREQUENCY"  |"MONETARYVALUE"  |"TIME_OF_DAY"  |"AGE"  |"YEAR"  |"MONTH"  |"YEAR_MONTH"  |
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|172662         |Delhi   |2020-03-31  |12.0000        |2                 |60                    |8        

In [166]:
AVG_NPDR_YEAR = NPS_COMPARE.groupBy(F.col("YEAR")).agg(F.avg('"DAYS_TO_NEXT_ORDER"'))
AVG_NPDR_MONTH = NPS_COMPARE.groupBy(F.col("MONTH")).agg(F.avg('"DAYS_TO_NEXT_ORDER"'))
AVG_NPDR_YM = NPS_COMPARE.groupBy(F.col("YEAR"),F.col("MONTH")).agg(F.avg('"DAYS_TO_NEXT_ORDER"'))
AVG_NPDR_YEAR.show()
AVG_NPS_SCORE_YEAR.show()
AVG_NPDR_MONTH.show()
AVG_NPS_SCORE_MONTH.show()
AVG_NPDR_YM.show()
AVG_NPS_SCORE_YM.show()

--------------------------------------
|"YEAR"  |"AVG(DAYS_TO_NEXT_ORDER)"  |
--------------------------------------
|2019    |43.885027                  |
|2020    |24.773306                  |
|2022    |12.569290                  |
|2021    |16.843626                  |
--------------------------------------

----------------------------------------
|"SURVEY_TS_YEAR"  |"AVG(NPS_SCORE)"   |
----------------------------------------
|2023              |70.0               |
|2019              |89.78290366350068  |
|2020              |82.65104808877929  |
|2021              |70.78996630475477  |
|2022              |73.91523935002196  |
----------------------------------------

---------------------------------------
|"MONTH"  |"AVG(DAYS_TO_NEXT_ORDER)"  |
---------------------------------------
|7        |17.482664                  |
|1        |18.871947                  |
|5        |17.842351                  |
|4        |18.088972                  |
|9        |16.592366                 

In [153]:
AVG_SPEND_YEAR = NPS_COMPARE.group_by(F.col("YEAR")).agg(F.avg("ORDER_TOTAL"))
AVG_SPEND_YEAR.show()

-------------------------------
|"YEAR"  |"AVG(ORDER_TOTAL)"  |
-------------------------------
|2021    |41.1477210996       |
|2020    |41.4189057943       |
|2022    |40.2805051052       |
|2019    |37.1881657206       |
-------------------------------



In [155]:
CUSTOMER_BY_YEAR = NPS_COMPARE.group_by(F.col("YEAR")).agg(F.count_distinct("CUSTOMER_ID"))
CUSTOMER_BY_YEAR.show()

------------------------------------------
|"YEAR"  |"COUNT(DISTINCT CUSTOMER_ID)"  |
------------------------------------------
|2020    |221757                         |
|2021    |222540                         |
|2019    |100903                         |
|2022    |222540                         |
------------------------------------------



--------------------------------------
|"YEAR"  |"AVG(DAYS_TO_NEXT_ORDER)"  |
--------------------------------------
|2019    |43.885027                  |
|2020    |24.773306                  |
|2022    |12.569290                  |
|2021    |16.843626                  |
--------------------------------------

In [156]:
CUSTOMER_UNIQUE = NPS_COMPARE.agg(F.count_distinct("CUSTOMER_ID"))
CUSTOMER_UNIQUE.show()

---------------------------------
|"COUNT(DISTINCT CUSTOMER_ID)"  |
---------------------------------
|222540                         |
---------------------------------



In [170]:
AVG_SALES_YM = NPS_COMPARE.groupBy(F.col("YEAR"),F.col("MONTH")).agg(F.avg('"ORDER_TOTAL"'))
SUM_SALES_YM = NPS_COMPARE.groupBy(F.col("YEAR"),F.col("MONTH")).agg(F.sum('"ORDER_TOTAL"'))

AVG_SALES_YM.show()
SUM_SALES_YM.show()
AVG_NPDR_YM.show()

temp = SUM_SALES_YM.join(AVG_NPDR_YM, (SUM_SALES_YM["YEAR"] == AVG_NPDR_YM["YEAR"])&(SUM_SALES_YM["MONTH"] == AVG_NPDR_YM["MONTH"]))
temp.show()

-----------------------------------------
|"YEAR"  |"MONTH"  |"AVG(ORDER_TOTAL)"  |
-----------------------------------------
|2020    |6        |40.9930506501       |
|2022    |7        |40.2391587444       |
|2020    |12       |42.0042184028       |
|2021    |11       |41.1920454646       |
|2019    |4        |35.9820344130       |
|2021    |12       |41.1608334972       |
|2020    |3        |40.9749306408       |
|2020    |11       |41.9235477415       |
|2020    |10       |42.0323526472       |
|2021    |2        |40.9168982283       |
-----------------------------------------

-----------------------------------------
|"YEAR"  |"MONTH"  |"SUM(ORDER_TOTAL)"  |
-----------------------------------------
|2020    |6        |9401264.2500        |
|2020    |12       |13382712.0000       |
|2021    |11       |18665928.2500       |
|2019    |4        |213301.5000         |
|2020    |11       |12505542.7500       |
|2021    |12       |19045652.7500       |
|2022    |7        |21768942.2500