## s1 environment setup

In [0]:
### libraries
import pyspark
import os

import re
import numpy as np

from pyspark import sql 
from pyspark.sql import Window
from pyspark.sql import functions as f

In [0]:
# ------------- Use snowflake utility
sfUtils = sc._jvm.net.snowflake.spark.snowflake.Utils

# ------------ login to snowflake
password = dbutils.secrets.get(scope = "auea-kv-sbx-dxdtlprdct01", key = "sfdbrsdskey")

options = {
  "sfUrl": "vodafonenz_prod.australia-east.azure.snowflakecomputing.com/", 
  "sfUser": "SVC_LAB_DS_DATABRICKS",
  "pem_private_key": password.replace('\\n', '\n'),
  "sfDatabase": "LAB_ML_STORE",
  "sfSchema": "SANDBOX",
  "sfWarehouse": "LAB_DS_WH"
}

### utility functions

In [0]:
%run "../utility_functions/spkdf_utils"

In [0]:
%run "../utility_functions/utility_functions"

In [0]:
%run "../utility_functions/misc"

### directories

In [0]:
dir_data_parent = "/mnt/feature-store-dev/feature-store-business"
dir_data_parent_shared = os.path.join(dir_data_parent, "dev_shared")
dir_data_parent_users = os.path.join(dir_data_parent, "dev_users/dev_sc/2025q1_bfs")

In [0]:
dir_data_raw =  os.path.join(dir_data_parent_shared, 'd100_raw')
dir_data_meta = os.path.join(dir_data_parent_users, 'd000_meta')
dir_data_stg = os.path.join(dir_data_parent_users, "d200_staging")
dir_data_int =  os.path.join(dir_data_parent_users, "d200_intermediate")
dir_data_prm =  os.path.join(dir_data_parent_users, "d300_primary")
dir_data_fea =  os.path.join(dir_data_parent_users, "d400_feature")
dir_data_mvmt = os.path.join(dir_data_parent_users, "d500_movement")
dir_data_serv = os.path.join(dir_data_parent_users, "d600_serving")
dir_data_tmp =  os.path.join(dir_data_parent_users, "d999_tmp")

## s2 data import

In [0]:
df_global_calendar_meta = spark.read.format('delta').load('dbfs:/mnt/feature-store-prod-lab/d000_meta/d001_global_cycle_calendar')
df_fsr_field_meta = spark.read.format("delta").load(os.path.join(dir_data_meta, "d004_fsr_meta/fsr_field_meta"))

In [0]:
# # d_customer 
# df_raw_d_cust = spark.read.format('delta').load(os.path.join(dir_data_raw, 'd105_dp_martech/raw_martech_d_customer'))
# # d_service 
# df_raw_d_srvc = spark.read.format('delta').load(os.path.join(dir_data_raw, 'd105_dp_martech/raw_martech_d_service'))
# # d_sf_ent_contact 
# df_raw_d_sf_ent_contact = spark.read.format('delta').load(os.path.join(dir_data_raw, 'd105_dp_martech/raw_martech_d_ent_contact'))
# # d_sf_ent_account 
# df_raw_d_sf_ent_account = spark.read.format('delta').load(os.path.join(dir_data_raw, 'd105_dp_martech/raw_martech_d_ent_account'))
# # d_sf_ent_customer_datasource 
# df_raw_d_sf_ent_cust_ds = spark.read.format('delta').load(os.path.join(dir_data_raw, 'd105_dp_martech/raw_martech_d_ent_customer_datasource'))
# # f_sf_ent_customer_relationship 
# df_raw_f_cust = spark.read.format('delta').load(os.path.join(dir_data_raw, 'd105_dp_martech/raw_martech_f_ent_customer_relationship'))
# # f_sf_ent_customer_contact_relationship 
# df_raw_f_cust_contact = spark.read.format('delta').load(os.path.join(dir_data_raw, 'd105_dp_martech/raw_martech_f_ent_customer_contact_relationship'))

## s03 stage layer 01

In [0]:
df_stag_cust_01 = spark.sql(
    f"""
        select 
            f_cust.account_source_id
            , f_cust.customer_id 
            , f_cust.record_start_date_time
            , f_cust.record_end_date_time
            , f_cust.record_update_date_time
            --, f_cust.current_record_flag
            , customer_type
            , customer_mkt_segment 
            , customer_status_name
            , d_account.customer_name
            , customer_first_name
            , customer_middle_name
            , customer_last_name 
            , trading_as_name
            , company_registration_number 
            , NZBN
            , number_of_employees
            , owner_email
            , owner_first_name
            , owner_last_name
            , revenue_last_month_amount 
            , total_monthly_revenue
            , total_revenue_last_12_mths
            , account_landscaping_completeness
            , landscaped_status_desc
            , sales_segment
            , service_segment
            , sales_segment_new
            , sb_unmanaged_flag
            , sf_ent_account_status_name
            , sbl_consumer_send_promotes_email_flag
            , sbl_consumer_send_promotes_outbound_call_flag
            , sbl_consumer_send_promotes_txt_flag
            , business_customer_send_promotes_flag
            , customer_email
            , converged_status
            , customer_primary_contact_id
            , customer_primary_contact_email
            , customer_birth_date
            , customer_activation_date
            , customer_default_bill_cycle_id
            , source_system_code
        from delta.`{dir_data_raw}/d105_dp_martech/raw_martech_f_ent_customer_relationship` f_cust
        left join delta.`{dir_data_raw}/d105_dp_martech/raw_martech_d_customer` d_cust
            on f_cust.d_customer_key = d_cust.d_customer_key
        left join delta.`{dir_data_raw}/d105_dp_martech/raw_martech_d_ent_account` d_account
            on f_cust.d_sf_ent_account_key = d_account.d_sf_ent_account_key 
        -- left join delta.`{dir_data_raw}/d105_dp_martech/raw_martech_d_ent_customer_datasource` d_sf_cust_ds
        --      on f_cust.d_sf_ent_customer_datasource_key = d_sf_cust_ds.d_sf_ent_customer_datasource_key 
        --      and d_sf_cust_ds.customer_active_flag = 'Y'
        where d_cust.customer_status_name = 'Active'
            and f_cust.customer_id != 'Unknown'  

    """
)

In [0]:
df_stag_srvc_01 = spark.sql(
    f"""
        select 
           service_id
           , customer_id
           , billing_account_number
           , service_type_name
           , paymt_meth_cd
           , mobile_number
           , plan_name
           , connection_activation_date
           , plan_start_date
           , product_subscription_date
           , proposition_installed_date_time
           , proposition_name
           , plan_status
           , contract_start_date
           , contract_end_date
           , contract_term
           , record_start_date_time
           , record_end_date_time
           , record_update_date_time
           --, current_record_flag
        from delta.`{dir_data_raw}/d105_dp_martech/raw_martech_d_service` d_srvc
        where d_srvc.service_status_name = 'Active'
    """
)

In [0]:
display(df_stag_cust_01.limit(10))
display(df_stag_srvc_01.limit(10))

In [0]:
(
    df_stag_cust_01
    .write 
    .format("delta") 
    .mode("overwrite") 
    .partitionBy("record_update_date_time") 
    .save("/mnt/feature-store-dev/feature-store-business/dev_users/dev_sc/2025q1_bfs/d200_staging/staging_customer")
)

In [0]:
(
    df_stag_srvc_01
    .write 
    .format("delta") 
    .mode("overwrite") 
    .partitionBy("record_update_date_time") 
    .save("/mnt/feature-store-dev/feature-store-business/dev_users/dev_sc/2025q1_bfs/d200_staging/staging_service")
)

### 