#### s01 set up 

In [0]:
import pyspark 
import os
from pyspark import sql
from pyspark.sql import functions as f
from pyspark.sql import Window
from datetime import datetime, timedelta

In [0]:
password = dbutils.secrets.get(scope = "auea-kv-sbx-dxdtlprdct01", key = "sfdbrsdskey")
options = {
  "sfUrl": "vodafonenz_prod.australia-east.azure.snowflakecomputing.com", 
  "sfUser": "SVC_LAB_DS_DATABRICKS",
  "pem_private_key": password.replace('\\n', '\n'),
  "sfDatabase": "LAB_ML_STORE",
  "sfSchema": "sandbox",
  "sfWarehouse": "LAB_DS_WH_SCALE"
}

In [0]:
df_app = (
    spark
    .read
    .format("snowflake")
    .options(**options)
    .option(
        "query"
        , """ 
     SELECT 
        DATA_CUSTOMERREF as fs_cust_id
        , A.ID
        , A.DATA_APPLICATIONTYPE
        , A.DATA_PLANTYPE
        , case 
            when try_to_number(a.data_plantype) = 1 then 'Consumer plan only'
            when try_to_number(a.data_plantype) = 2 then 'Consumer BB New'
            when try_to_number(a.data_plantype) = 3 then 'Existing Plan Only'
            when try_to_number(a.data_plantype) = 4 then 'Consumer IFP new'
            when try_to_number(a.data_plantype) = 5 then 'Existing BB'
            when try_to_number(a.data_plantype) = 6 then 'Existing IFP'
            when try_to_number(a.data_plantype) = 7 then 'SME New'
            when try_to_number(a.data_plantype) = 8 then 'Enterprise New'
            when try_to_number(a.data_plantype) = 9 then 'Business Existing'
            else 'Other'
        end as plantype_desc
        , CONVERT_TIMEZONE('UTC', 'Pacific/Auckland', to_timestamp_ltz(A.DATA_CREATEDAT)) as application_createdat_nzt
        , CONVERT_TIMEZONE('UTC', 'Pacific/Auckland', to_timestamp_ltz(A.DATA_UPDATEDAT)) as data_updated_nzt
        , DATA_DECISION
        , DATA_DECISIONBAND
        , DATA_DECISIONDESCRIPTION
        FROM PROD_CREDISENSE.RAW.APPLICATION A
        WHERE _is_deleted = 0 
        AND _is_latest = 1;
    """
    ).load()
)

#### s02 parameters

In [0]:
dir_wo_mvnt  = '/mnt/feature-store-prod-lab/d500_movement/d501_mobile_oa_consumer/mvmt_writeoff'
dir_aod_mvnt = '/mnt/ml-lab/dev_users/dev_sc/aod30_mvnt_acct_agg' # monthly level of aod movement 
dir_wo_score = '/mnt/ml-store-prod-lab/classification/d400_model_score/mobile_oa_consumer_srvc_writeoff_pred365d/model_version=version_1'
dir_fea_unitbase = '/mnt/feature-store-prod-lab/d400_feature/d401_mobile_oa_consumer/fea_unit_base'


#### s03 load data

In [0]:
df_wo_mvnt = spark.read.format('delta').load(dir_wo_mvnt)
df_aod_mvnt = spark.read.format('delta').load(dir_aod_mvnt)
df_wo_score = spark.read.format('delta').load(dir_wo_score)
df_fea_unitbase = spark.read.format('delta').load(dir_fea_unitbase)

In [0]:
df_wo_mvnt_stag = (
       df_wo_mvnt
       # .filter(f.col('reporting_cycle_type') == 'rolling cycle')
       .select('fs_acct_id', 'fs_cust_id', 'movement_date', 'writeoff_amt', 'movement_type' )
       .distinct()
        )

In [0]:
df_aod_mvnt_stag = (
       df_aod_mvnt
       .select('fs_acct_id', 'fs_cust_id', 'movement_date', 'movement_type')
       .distinct()
)

In [0]:
df_app_agg = (
        df_app
        .withColumn('decision_created_month', f.date_format('application_createdat_nzt', 'yyyyMM'))
        .withColumn('rnk', f.row_number().over(Window.partitionBy('fs_cust_id').orderBy(f.desc('application_createdat_nzt'))))
        .filter(f.col('rnk') ==1)
        # .groupBy('decision_created_month')
        # .agg(  f.countDistinct('fs_cust_id')    
        #       , f.count('*') 
        #     )
)

In [0]:
display(
  df_app_agg
  .agg(f.min('application_createdat_nzt')
       , f.max('application_createdat_nzt')
       )
)

In [0]:
display(
    df_app_agg
    .filter(f.col('PLANTYPE_DESC').isin('Existing IFP'))
    .groupBy('PLANTYPE_DESC', 'DATA_DECISIONDESCRIPTION')
    .agg(f.countDistinct('fs_cust_id'))
)

In [0]:
display(
    df_app
    .filter(f.col('DATA_DECISIONDESCRIPTION') == 'Approved')
    .filter(f.col('PLANTYPE_DESC') =='Existing IFP' )
)