# Project Path

## Setup

In [1]:
# Import packages
import setup
import pandas as pd
from ds_common_utils.aux.io.snowflake_tools import SnowflakeTools

In [2]:
# Setup analysis and get connection
con = SnowflakeTools().get_snowflake_ctx(
     method = 'token',
    user = '250807',
    role = 'INSIGHT_ANALYST_MERCH_DE_GENERAL_PRD',
    warehouse = 'INSIGHT_ANALYST_WH'
)
setup.set_dates(con, setup.dates)

snowflake_tools - 2025-06-24 09:59:41.307925+10:00 - Generating new token with 'INSIGHT_ANALYST_MERCH_DE_GENERAL_PRD' role and 'SESSION:ROLE-ANY' scope...
snowflake_tools - 2025-06-24 10:00:15.950292+10:00 - Saved token with '2025-06-24 14:00:15+10:00' expiry...


Unnamed: 0,START_DATE,END_DATE,PP_START_DATE
0,2024-05-01,2025-04-30,2022-10-01


In [3]:
# Possible granularities for purchase paths
granularity = {
    'dept': {'sub_dept': '-- ', 'class': '-- ', 'sub_class': '-- '}, 
    'sub_dept': {'sub_dept': '', 'class': '-- ', 'sub_class': '-- '}, 
    'class': {'sub_dept': '', 'class': '', 'sub_class': '-- '}, 
    'sub_class': {'sub_dept': '', 'class': '', 'sub_class': ''}
    }

## Consumer AU

In [4]:
# Get data at different granularities
consumer_data = {}

for pft_level in granularity:
    # Set parquet path
    parquet_path = 'parquets/df_purchase_path_consumer_AU_pest_control_' + pft_level + '_20_limit.parquet'
    # Get data
    try:
        consumer_data[pft_level] = pd.read_parquet(parquet_path)
    except:
        # Comment out necessary lines in query depending on granularity
        with open('sql/project-path-consumer.sql', 'r') as query:
            pp_query = query.read().format(pp_sub_dept=granularity[pft_level]['sub_dept'], pp_class=granularity[pft_level]['class'], pp_sub_class=granularity[pft_level]['sub_class'])
            df = pd.read_sql_query(pp_query, con)
        consumer_data[pft_level] = df
        df.to_parquet(parquet_path)

In [5]:
consumer_data

{'dept':                   ITEM_CATEGORY_NAME          ITEM_DEPARTMENT_NAME  \
 0                 200 HOME DECORATOR                  300 FLOORING   
 1          200 BUILDERS AND PLUMBING        300 BUILDING MATERIALS   
 2                          200 TOOLS              300 TOOL STORAGE   
 3                          200 TOOLS                300 HAND TOOLS   
 4                 200 DESIGN AND ORG                  301 BATHROOM   
 ...                              ...                           ...   
 2513  200 GARDEN MAINT AND CONSTRUCT              300 POWER GARDEN   
 2514                   200 GARDENING              300 PEST CONTROL   
 2515  200 GARDEN MAINT AND CONSTRUCT                  300 WATERING   
 2516              200 OUTDOOR LIVING                 300 FURNITURE   
 2517              200 OUTDOOR LIVING  301 POOL SPA SHEDS AND SHADE   
 
       WEEKS_SINCE_PURCHASE  IRRIGATION_PROJECTS_CONSUMER  CUSTOMERS  \
 0                      -26                       3449343       31

# Commercial NZ

In [7]:
# Get data at different granularities
commercial_data = {}

for pft_level in granularity:
    # Set parquet path
    parquet_path = 'parquets/df_purchase_path_commercial_NZ_pest_control_' + pft_level + '_20_limit.parquet'
    # Get data
    try:
        commercial_data[pft_level] = pd.read_parquet(parquet_path)
    except:
        # Comment out necessary lines in query depending on granularity
        with open('sql/project-path-commercial.sql', 'r') as query:
            pp_query = query.read().format(pp_sub_dept=granularity[pft_level]['sub_dept'], pp_class=granularity[pft_level]['class'], pp_sub_class=granularity[pft_level]['sub_class'])
            df = pd.read_sql_query(pp_query, con)
        commercial_data[pft_level] = df
        df.to_parquet(parquet_path)

# Results

In [10]:
with pd.ExcelWriter("data/consumer_purchase_path_raw_data.xlsx") as writer:
    for df in consumer_data:
        consumer_data[df].to_excel(writer, sheet_name=df)

In [11]:
with pd.ExcelWriter("data/commercial_purchase_path_raw_data.xlsx") as writer:
    for df in commercial_data:
        commercial_data[df].to_excel(writer, sheet_name=df)

In [12]:
# Pivot results
df_consumer_results = {}
family_tree_levels = ['ITEM_CATEGORY_NAME', 'ITEM_DEPARTMENT_NAME', 'ITEM_SUB_DEPARTMENT_NAME', 'ITEM_CLASS_NAME', 'ITEM_SUB_CLASS_NAME']

for i, pft_level in enumerate(granularity):
    df = consumer_data[pft_level]
    df = df.loc[df['SALES_RANK'] <= 10, :]
    df = df.pivot_table(index=family_tree_levels[:i + 2], columns='WEEKS_SINCE_PURCHASE', values='SALES_RANK')
    df = df.astype('Int64')
    df = df.style.background_gradient(subset=pd.IndexSlice[:, df.columns], cmap='Greens_r')
    df_consumer_results[pft_level] = df

In [13]:
# Pivot results
df_commercial_results = {}
family_tree_levels = ['ITEM_CATEGORY_NAME', 'ITEM_DEPARTMENT_NAME', 'ITEM_SUB_DEPARTMENT_NAME', 'ITEM_CLASS_NAME', 'ITEM_SUB_CLASS_NAME']

for i, pft_level in enumerate(granularity):
    df = commercial_data[pft_level]
    df = df.loc[df['SALES_RANK'] <= 10, :]
    df = df.pivot_table(index=family_tree_levels[:i + 2], columns='WEEKS_SINCE_PURCHASE', values='SALES_RANK')
    df = df.astype('Int64')
    df = df.style.background_gradient(subset=pd.IndexSlice[:, df.columns], cmap='Greens_r')
    df_commercial_results[pft_level] = df

In [14]:
# Write results to excel
with pd.ExcelWriter("data/purchase_path_consumer_pest_control.xlsx") as writer:
    for df in df_consumer_results:
        df_consumer_results[df].to_excel(writer, sheet_name=df)

In [15]:
# Write results to excel
with pd.ExcelWriter("data/purchase_path_commercial_pest_control.xlsx") as writer:
    for df in df_commercial_results:
        df_commercial_results[df].to_excel(writer, sheet_name=df)