![download.png](download.png)


## Import Packages

In [4]:
#Import Python packages
import pandas as pd
import plotly.express as px
import json
import sys
import cachetools
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
import numpy as np
import json

# Import Snowflake modules
from snowflake.snowpark import Session
import snowflake.snowpark.functions as F
import snowflake.snowpark.types as T
from snowflake.snowpark import Window

## Connect to Snowflake

In [14]:
# Get account credentials from a json file
with open("data_scientist_auth.json") as f:
    data = json.load(f)
    username = data["username"]
    password = data["password"]
    account = data["account"]

# Specify connection parameters
connection_parameters = {
    "account": account,
    "user": username,
    "password": password,
    "role": "tasty_data_scientist",
    "warehouse": "tasty_ds_wh",
    "database": "frostbyte_tasty_bytes",
    "schema": "analytics",
}

# Create Snowpark session
session = Session.builder.configs(connection_parameters).create()

## Gathering Data as Snowpark DataFrame

In [15]:
#Orders_V
tasty_bytes_orders_v = session.table("frostbyte_tasty_bytes.analytics.orders_v")
#Customer Loyalty
tasty_bytes_customer_loyalty = session.table("frostbyte_tasty_bytes.raw_customer.customer_loyalty")

## Preview The Data

In [18]:
tasty_bytes_orders_v.show()

-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"DATE"      |"ORDER_ID"  |"TRUCK_ID"  |"ORDER_TS"           |"ORDER_DETAIL_ID"  |"LINE_NUMBER"  |"TRUCK_BRAND_NAME"  |"MENU_TYPE"  |"PRIMARY_CITY"  |"REGION"                 |"COUNTRY"  |"FRANCHISE_FLAG"  |"FRANCHISE_ID"  |"FRANCHISEE_FIRST_NAME"  |"FRANCHISEE_LAST_NAME"  |"LOCATION_ID"  |"CUSTOMER_ID"  |"FIRST_NAME"  |"LAST_NAME"  |"E_MAIL"  |"PHONE_NUMBER"  |"CHILDREN_COUNT"  |"GENDER"  |"MARITAL_STATUS"  |"MENU_ITE

In [19]:
tasty_bytes_customer_loyalty.show()

-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"CUSTOMER_ID"  |"FIRST_NAME"  |"LAST_NAME"  |"CITY"          |"COUNTRY"  |"POSTAL_CODE"  |"PREFERRED_LANGUAGE"  |"GENDER"     |"FAVOURITE_BRAND"  |"MARITAL_STATUS"  |"CHILDREN_COUNT"  |"SIGN_UP_DATE"  |"BIRTHDAY_DATE"  |"E_MAIL"                  |"PHONE_NUMBER"  |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|1              |Rodolfo       |Tucker       |Hamburg         |Germany    |21129          |German                |Female       |NULL               |Single            |0                 |2019-02-19      

## Data Exploration

In [24]:
tasty_bytes_customer_loyalty.group_by(F.col('CITY')).count().to_pandas().sort_values(by='COUNT',ascending=False)

Unnamed: 0,CITY,COUNT
22,San Mateo,10613
18,Toronto,10476
19,London,10214
23,Tokyo,10117
11,New York City,10016
10,Montreal,9973
12,Warsaw,9299
24,Boston,9261
20,Denver,9122
29,Mumbai,8886


In [27]:
tasty_bytes_orders_v.group_by(F.col('PRIMARY_CITY')).agg(F.countDistinct('ORDER_ID')).to_pandas().sort_values(by='COUNT(DISTINCT ORDER_ID)',ascending=False)

Unnamed: 0,PRIMARY_CITY,COUNT(DISTINCT ORDER_ID)
22,Seoul,15272135
11,Cape Town,13609418
6,Sao Paulo,13316682
13,Delhi,13076336
23,New York City,12637979
17,Mumbai,12347809
16,Sydney,12207198
14,Tokyo,11762202
4,Madrid,11668304
21,Berlin,11342624


In the above code snippets we took a look at the number of unique customers and transactions by cities. We have identified that San Mateo has the highest number of unique customers but the lowest number of transactions which could suggest the highest churn rate.

In [28]:
last_tran_city = tasty_bytes_orders_v.group_by(F.col('PRIMARY_CITY')).max(F.to_date(F.col('ORDER_TS'))).to_pandas()



In [30]:
import datetime

In [32]:
last_tran_city['CHURN_DATE']=last_tran_city['MAX(TO_DATE(ORDER_TS))']-datetime.timedelta(days=14)
last_tran_city['CUSTOMER_COUNT_DATE']=last_tran_city['MAX(TO_DATE(ORDER_TS))']-datetime.timedelta(days=44)

In [33]:
last_tran_city

Unnamed: 0,PRIMARY_CITY,MAX(TO_DATE(ORDER_TS)),CHURN_DATE,CUSTOMER_COUNT_DATE
0,London,2022-11-01,2022-10-18,2022-09-18
1,Vancouver,2022-11-01,2022-10-18,2022-09-18
2,Warsaw,2022-11-01,2022-10-18,2022-09-18
3,Seattle,2022-11-01,2022-10-18,2022-09-18
4,Toronto,2022-11-01,2022-10-18,2022-09-18
5,Denver,2022-11-01,2022-10-18,2022-09-18
6,Madrid,2022-11-01,2022-10-18,2022-09-18
7,Sao Paulo,2022-11-01,2022-10-18,2022-09-18
8,Hamburg,2022-11-01,2022-10-18,2022-09-18
9,Melbourne,2022-11-01,2022-10-18,2022-09-18


In [37]:
last_tran_city_sdf=session.createDataFrame(last_tran_city)
last_tran_city_sdf.show()

------------------------------------------------------------------------------------
|"PRIMARY_CITY"  |"MAX(TO_DATE(ORDER_TS))"  |"CHURN_DATE"  |"CUSTOMER_COUNT_DATE"  |
------------------------------------------------------------------------------------
|London          |2022-11-01                |2022-10-18    |2022-09-18             |
|Vancouver       |2022-11-01                |2022-10-18    |2022-09-18             |
|Warsaw          |2022-11-01                |2022-10-18    |2022-09-18             |
|Seattle         |2022-11-01                |2022-10-18    |2022-09-18             |
|Toronto         |2022-11-01                |2022-10-18    |2022-09-18             |
|Denver          |2022-11-01                |2022-10-18    |2022-09-18             |
|Madrid          |2022-11-01                |2022-10-18    |2022-09-18             |
|Sao Paulo       |2022-11-01                |2022-10-18    |2022-09-18             |
|Hamburg         |2022-11-01                |2022-10-18    |2022-

In [39]:
last_tran_city_sdf_join = last_tran_city_sdf.join(
    right=tasty_bytes_orders_v,
    on="PRIMARY_CITY",
    how="inner"
)

last_tran_city_sdf_join.show()

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"PRIMARY_CITY"  |"MAX(TO_DATE(ORDER_TS))"  |"CHURN_DATE"  |"CUSTOMER_COUNT_DATE"  |"DATE"      |"ORDER_ID"  |"TRUCK_ID"  |"ORDER_TS"           |"ORDER_DETAIL_ID"  |"LINE_NUMBER"  |"TRUCK_BRAND_NAME"  |"MENU_TYPE"     |"REGION"                 |"COUNTRY"  |"FRANCHISE_FLAG"  |"FRANCHISE_ID"  |"FRANCHISEE_FIRST_NAME"  |"FRANCHISEE_LAST_NAME"  |"LOCATION

In [41]:
df = last_tran_city_sdf_join[last_tran_city_sdf_join["DATE"] > last_tran_city_sdf_join["CUSTOMER_COUNT_DATE"]]
df.show()

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"PRIMARY_CITY"  |"MAX(TO_DATE(ORDER_TS))"  |"CHURN_DATE"  |"CUSTOMER_COUNT_DATE"  |"DATE"      |"ORDER_ID"  |"TRUCK_ID"  |"ORDER_TS"           |"ORDER_DETAIL_ID"  |"LINE_NUMBER"  |"TRUCK_BRAND_NAME"  |"MENU_TYPE"     |"REGION"                 |"COUNTRY"  |"FRANCHISE_FLAG"  |"FRANCHISE_ID"  |"FRANCHISEE_FIRST_NAME"  |"FRANCHISEE_LAST_NAME"  |"LOCATION

In [42]:
df2 = df[last_tran_city_sdf_join["DATE"] > last_tran_city_sdf_join["CHURN_DATE"]]

In [46]:
cust_bef = df.groupBy("PRIMARY_CITY").agg(F.countDistinct(F.col("CUSTOMER_ID")).alias('COUNT_BEF'))
cust_aft = df2.groupBy("PRIMARY_CITY").agg(F.countDistinct(F.col("CUSTOMER_ID")).alias('COUNT_AFT'))

cust_bef.show(400)

--------------------------------
|"PRIMARY_CITY"  |"COUNT_BEF"  |
--------------------------------
|Warsaw          |9153         |
|London          |9954         |
|Vancouver       |7663         |
|Seattle         |7169         |
|Cape Town       |4670         |
|Delhi           |6708         |
|Tokyo           |9931         |
|Barcelona       |3462         |
|Rio de Janeiro  |5683         |
|Melbourne       |6752         |
|Hamburg         |6198         |
|Manchester      |8324         |
|Boston          |8922         |
|Montreal        |9709         |
|Madrid          |4819         |
|Toronto         |10238        |
|Sao Paulo       |4832         |
|Denver          |8988         |
|New York City   |9853         |
|Krakow          |4581         |
|Berlin          |3608         |
|Paris           |7235         |
|Stockholm       |6053         |
|Seoul           |7756         |
|Cairo           |5556         |
|Munich          |5531         |
|Sydney          |8000         |
|Nice     

In [49]:
calculator = cust_aft.join(
    right=cust_bef,
    on="PRIMARY_CITY",
    how="inner"
)

calc = calculator.to_pandas()

calc["CHURN_RATE"] = (((calc["COUNT_BEF"] - calc["COUNT_AFT"]) / calc["COUNT_BEF"]) * 100)

calc.sort_values(by='CHURN_RATE',ascending=False)

Unnamed: 0,PRIMARY_CITY,COUNT_AFT,COUNT_BEF,CHURN_RATE
8,Melbourne,4016,6752,40.521327
1,London,6048,9954,39.240506
15,San Mateo,6235,9953,37.355571
16,Nice,4395,6817,35.528825
5,Madrid,3135,4819,34.945009
13,Barcelona,2262,3462,34.662045
23,Cairo,3639,5556,34.50324
14,Rio de Janeiro,3778,5683,33.521028
17,Sydney,5422,8000,32.225
10,Cape Town,3206,4670,31.349036
