# UDTF Approach

In [7]:
from IPython.display import display, HTML, Image , Markdown
from snowflake.snowpark.session import Session
import snowflake.snowpark.types as T
import snowflake.snowpark.functions as F
import os ,configparser ,json

display(Markdown("### Initialization"))

# Source various helper functions
%run ../scripts/notebook_helpers.py

PROJECT_HOME_DIR = '../../'
config = get_config(PROJECT_HOME_DIR)
sp_session = connect_to_snowflake(PROJECT_HOME_DIR)

if(sp_session == None):
    raise Exception(f'Unable to connect to snowflake. Validate connection information ')

sp_session.sql(f''' use role {config['APP']['role']}; ''').collect()
sp_session.sql(f''' use database {config['APP']['database']}; ''').collect()
sp_session.sql(f''' use schema  {config['APP']['database']}.{config['APP']['schema']}; ''').collect()
sp_session.sql(f''' use warehouse {config['APP']['warehouse']}; ''').collect()

df = sp_session.sql('select current_user() ,current_role() ,current_database() ,current_schema();').to_pandas()
display(df)

### Initialization

Unnamed: 0,CURRENT_USER(),CURRENT_ROLE(),CURRENT_DATABASE(),CURRENT_SCHEMA()
0,VSEKAR,DEV_PCTRANSPERANCY_DEMO_RL,SFLK_PRICING_TRANSPERANCY,PUBLIC


In [8]:
data_file = '2022_10_01_priority_health_HMO_in-network-rates.zip'
stage_path = 'data_stg/price_transperancy'

# data_file = '2022_07_01_priority_health_HMO_in-network-rates.json'
# data_file = '2022-11-01_cigna-health-life-insurance-company_national-oap_in-network-rates.json.gz'
# stage_path = 'ext_data_stg/data'

In [9]:
# Upload libraries to stage
lib_stage = config['APP']['lib_stage']

upload_locallibraries_to_p_stage(sp_session ,'../python/in_network_udtf' ,config['APP']['database'] ,'public' ,lib_stage ,'scripts')

stmts = [
    f''' 
        create or replace procedure innetwork_rates_segheader(
            batch_size integer ,stage_path varchar ,staged_data_flname varchar ,target_stage_for_segment_files varchar
            ,from_idx integer ,to_idx integer ,self_task_name varchar)
        returns variant
        language python
        runtime_version = '3.8'
        packages = ('snowflake-snowpark-python' ,'pandas', 'ijson' ,'simplejson')
        imports = ('@sflk_pricing_transperancy.public.lib_stg/scripts/negotiation_arrangements.py')
        handler = 'negotiation_arrangements.main'
    ;
    '''
]
for stmt in stmts:
    sp_session.sql(stmt).collect()

 Uploading library to stage: sflk_pricing_transperancy.public.lib_stg 
    ../python/in_network_udtf/negotiation_arrangements_dagbuilder.py => @lib_stg/scripts
    ../python/in_network_udtf/negotiation_arrangements.py => @lib_stg/scripts
    ../python/in_network_udtf/negotiation_arrangements_dagbuilder_tmp.py => @lib_stg/scripts


In [10]:
from datetime import datetime
now = datetime.now()

start_time = now.strftime("%H:%M:%S")
print(f'Started at: {start_time}')

sql_stmts = [
    f''' create or replace stage {config['APP']['database']}.public.{config['APP']['data_parsed_stg']}
            directory = (enable=true)
            comment = 'used for holding parsed record.'; '''

    # ,f''' alter warehouse dev_pctransperancy_demo_wh set max_concurrency_level = 8 '''
    ,f''' alter warehouse dev_pctransperancy_demo_wh set warehouse_size = XSMALL; '''
    ,f'''truncate table {config['APP']['database']}.public.segment_task_execution_status; '''
    ,f''' call {config['APP']['database']}.public.innetwork_rates_segheader(
            5 ,'{stage_path}' ,'{data_file}' ,'ext_data_stg/data_pricing_parsed' 
            ,-1 ,50 ,'-');'''
]
for stmt in sql_stmts:
    print(stmt)
    sp_session.sql(stmt).collect()

now = datetime.now()
end_time = now.strftime("%H:%M:%S")
print(f'Ended at: {end_time}')

Started at: 16:50:42
 create or replace stage sflk_pricing_transperancy.public.data_parsed_stg
            directory = (enable=true)
            comment = 'used for holding parsed record.'; 
 alter warehouse dev_pctransperancy_demo_wh set warehouse_size = XSMALL; 
truncate table sflk_pricing_transperancy.public.segment_task_execution_status; 
 call sflk_pricing_transperancy.public.innetwork_rates_segheader(
            5 ,'data_stg/price_transperancy' ,'2022_10_01_priority_health_HMO_in-network-rates.zip' ,'ext_data_stg/data_pricing_parsed' 
            ,-1 ,50 ,'-');
Ended at: 16:51:32


---
### TBL

In [11]:

df = sp_session.table(f'''{config['APP']['database']}.public.segment_task_execution_status''').limit(10).to_pandas()
display(df)

Unnamed: 0,DATA_FILE,TASK_NAME,ELAPSED,TASK_RET_STATUS,INSERTED_AT
0,,-,,"{""data_file"": ""2022_10_01_priority_health_HMO_...",2022-12-03 13:51:31.449


#### TODO : Perform some model analysis 
 - accuracy score
 - error rate calculation
 - confusion matrix
  ...

--- 
### Closeout

    With that we are finished this section of the demo setup

In [12]:
# sp_session.close()
print('Finished!!!')

Finished!!!
