<h1 style="text-align:center">Notebook Optimization/Simulation Pipeline</h1>

In [None]:
#!pip install --upgrade pip --index-url https://nexus-ha.cvshealth.com:9443/repository/pypi-proxy/simple
#!pip install --upgrade -r requirements.txt --index-url https://nexus-ha.cvshealth.com:9443/repository/pypi-proxy/simple
#!pip install teradata
#!sudo apt-get install unixodbc #in terminal

In [None]:
### Setup

In [None]:
import os
import socket
import json
import jinja2 as jj2
import datetime as dt


In [None]:
 # File Path Definitions
repo_head = '/home/jupyter/clientpharmacymacoptimization'
output_path = "home/jupyter/Output"
input_path = "gs://pbm-mac-lp-prod-ai-bucket/shared_input"

custom_params_json_path = os.path.join(output_path, 'custom_params.json')

# set/reset working directory to the clientpharmacymacoptimization repo location
os.chdir(repo_head)
program_dir = os.path.abspath(os.curdir)

### Parameters

In [None]:
# set custom parameters here
# (these will be passed to the parameters template)
client_lob = 'CMK' # Set LOB type
custom_params = {
    "TIMESTAMP": '\"'+ dt.datetime.now().strftime('%Y-%m-%d_%H%M%S%f') +'\"',
    "USER": '\"'+ socket.gethostname() +'\"',
    "FULL_YEAR": False,
    "CUSTOMER_ID": "['4588']",
    'CLIENT_NAME_TABLEAU':'SoGA',
    "DATA_ID": "'CHANGE_{}_DATE'.format(CUSTOMER_ID[0])",
    "BQ_INPUT_PROJECT_ID": "anbc-prod",
    "BQ_OUTPUT_DATASET": "ds_development_lp",
    "BQ_INPUT_DATASET_DS_PRO_LP": "fdl_gdp_ae_ds_pro_lp_share_ent_prod",
    "PROGRAM_INPUT_PATH": input_path,
    "PROGRAM_OUTPUT_PATH": output_path,
    "READ_FROM_BQ": True,
    "WRITE_TO_BQ": False,
    "UNC_OPT": False,
    "DROP_TABLES": False,
    "CLIENT_TYPE": "COMMERCIAL",
    "LAST_DATA": "dt.datetime.strptime('07/30/2022', '%m/%d/%Y')",
    "GO_LIVE":"dt.datetime.strptime('08/30/2022', '%m/%d/%Y')",
    "RAW_GOODRX": "'GoodRx price Jan file 04192021.xlsx'",
    "FLOOR_GPI_LIST": "'20201209_Floor_GPIs.csv'",
    "GOODRX_OPT": False,
    "FLOOR_PRICE": True,
    "UNC_ADJUST": True,
    "DATA_START_DAY": '2022-01-01',
    "TIERED_PRICE_LIM": True,
    "CLIENT_LOB": client_lob,
    "SMALL_CAPPED_PHARMACY_LIST": {
      'GNRC': ['MCHOICE', 'THF', 'AMZ', 'HYV', 'KIN', 'ABS', 'PBX', 'AHD', 'GIE', 'MJR', 'GEN', 'TPS'],
      'BRND': ['MCHOICE', 'ABS', 'AHD', 'PBX', 'MJR', 'WGS', 'SMC', 'CST', 'KIN', 'GIE', 'HYV', 'TPM', 'SMR', 'ARX', 'WIS', 'GEN', 'BGY', 'DDM', 'MCY', 'MGM', 'PMA', 'GUA', 'FVW', 'BRI', 'AMZ', 'THF', 'TPS']
    },
    "GENERIC_OPT" : True, 
    "BRAND_OPT" : False,
    "UCL_CLIENT": False,
    "TRUECOST_CLIENT": False,
}

### Automatic client parameters pull

In [None]:
# Change this first line to "if True:" in order to pull client parameters from the clnt_params table.
# NOTE that custom_params OVERRIDES any settings pulled from this table!
if True:
    from google.cloud import bigquery

    if ',' in custom_params['CUSTOMER_ID']:
        raise RuntimeError("Notebook production run should only run one client at a time. Please make sure "
                           "that the CUSTOMER_ID in custom_params contains only one client ID.")
    customer_id = custom_params['CUSTOMER_ID'].split("'")[1]
    if '"' in customer_id: # in case someone uses the other order of '' and ""
        customer_id = custom_params['CUSTOMER_ID'].split('"')[1]
    
    if client_lob == 'AETNA':
        customer_id_str = "'" + f"{customer_id}']"[:5].replace("'", r"\'") + "'"
        query = f"""SELECT * FROM anbc-prod.fdl_gdp_ae_ent_enrv_prod.gms_aetna_clnt_params
        where substr(customer_id,3,5) in ({customer_id_str})"""
    else:
        # this line of code ensures that <4-digit customer IDs can be matched in this query
        customer_id_str = "'" + f"{customer_id}']"[:4].replace("'", r"\'") + "'"
        query = f"""SELECT * FROM pbm-mac-lp-prod-ai.pricing_management.clnt_params
        where substr(customer_id,3,4) in ({customer_id_str})"""
    
    bq_client = bigquery.Client()
    query_job = bq_client.query(query)
    records = [dict(row) for row in query_job]
    if len(records)==0:
        raise RuntimeError(f"No client params found in clnt_params table for client {customer_id}!")
    if len(records)==0:
        raise RuntimeError(f"Multiple rows of client params found in clnt_params table for client {customer_id}!")
    
    records[0]['CLIENT_NAME_TABLEAU'] = records[0]['CLIENT_NAME_TABLEAU'][1:-1]
    records[0]['DATA_START_DAY'] = records[0]['DATA_START_DAY'][1:-1]
    if 'WTW' in records[0]['CLIENT_NAME_TABLEAU']:
        records[0]['TIERED_PRICE_LIM'] = False
        records[0]['GPI_UP_FAC'] = .24
        records[0]['RUN_TYPE_TABLEAU'] = 'Flat 24%'

    records[0].update(custom_params)
    custom_params = records[0]

### Audit trail setup

In [None]:
# Audit trail functionality will only be fully operational if output_path is a gs bucket.
# An AT_RUN_ID can still be generated for local files, however.

from GER_LP_Code.audit_trail_utils import AuditTrail
git_branch = !git rev-parse --abbrev-ref HEAD
git_hash = !git rev-parse --short HEAD
git_branch, git_hash = git_branch[0], git_hash[0]
algo_version = 'LP'
version_iteration = '0' # change as needed
version_type = f'WIP-{socket.gethostname()}'   # Use 'PROD' only for official versions
project_name = 'pbm-mac-lp-prod-ai'
gcp_registry_name = 'us.gcr.io'
base_name = 'pbm_base'
script_run_name = 'pbm_script_run'
opt_name = 'pbm_opt'

audit_obj = AuditTrail(
        git_branch = git_branch,
        git_hash = git_hash, 
        algo_version = algo_version,
        version_iteration = version_iteration, # change as needed
        version_type = version_type,    # Use 'PROD' only for official versions
        project_name = 'pbm-mac-lp-prod-ai',
        bucket_name = 'pbm-mac-lp-prod-ai-bucket',
        audit_trail_folder = 'Audit_Trail_Data',
        audit_trail_dataset_name = 'pricing_management', 
        audit_trail_table_name = 'AT_Run_ID',
        gcp_registry_name = 'us.gcr.io',
        base_name = 'pbm_base',
        script_run_name = 'pbm_script_run',
        opt_name = 'pbm_opt', 
        client_lob = client_lob,
        cmpas = 'False'#Set 'True' as a string for CMPAS run,
        uclclient= custom_params.get('TRUECOST_CLIENT', False) | custom_params.get('UCL_CLIENT', False),
)
AT_RUN_ID = audit_obj.get_latest_run_id(table_name = 'AT_Run_ID')
print("Latest RunID = ", AT_RUN_ID)

In [None]:
custom_params['AT_RUN_ID'] = AT_RUN_ID

### Directory Support

In [None]:
# directory support
os.makedirs(output_path, exist_ok=True)
for d in ['Output', 'Logs', 'LP', 'Dynamic_Input']:
    os.makedirs(os.path.join(output_path, d), exist_ok=True)

In [None]:
template = jj2.Template(open(os.path.join(program_dir, 'GER_LP_Code/CPMO_parameters_TEMPLATE.py')).read())
params = template.render(**custom_params)

# create parameters file with custom params set
# (note: this overwrites the CPMO_paramters.py file)
with open(os.path.join(program_dir, 'GER_LP_Code/CPMO_parameters.py'), 'w') as pfile:
    pfile.write(params)
# dump custom params to file for use with scripts
with open(custom_params_json_path, 'w') as f:
    json.dump(custom_params, f)

In [None]:
def update_audit_trail(
    params_file_in: str, 
    git_branch: str, 
    git_hash: str, 
    algo_version: str,
    version_type: str,
    version_iteration: str, 
    ipynb: bool
    ):
    
    import GER_LP_Code.audit_trail_utils as audit
    audit_obj = audit.AuditTrail(
        git_branch = git_branch,
        git_hash = git_hash,
        algo_version = algo_version,
        version_type = version_type,
        version_iteration = version_iteration
    )
    audit_obj.update_audit_trail(params_file_in, odp_lp = '', ipynb=ipynb)

In [None]:
params_file_in = os.path.join(custom_params['PROGRAM_OUTPUT_PATH'], 'CPMO_parameters.py')

# check for the duplicates AT_RUN_ID 
from google.cloud import bigquery
import pandas as pd
import GER_LP_Code.CPMO_parameters as p

bq_client = bigquery.Client()
query = f"""SELECT * FROM pbm-mac-lp-prod-ai.pricing_management.AT_Run_ID
        where RUN_ID IN ('{AT_RUN_ID}')"""
query_job = bq_client.query(query)
records = [dict(row) for row in query_job]

if len(records) == 0 and p.AT_RUN_ID == AT_RUN_ID:
        !gsutil cp {os.path.join(repo_head, 'GER_LP_Code', 'CPMO_parameters.py')} {params_file_in}
        update_audit_trail(
                params_file_in = params_file_in, 
                git_branch = git_branch, 
                git_hash = git_hash, 
                algo_version = algo_version,
                version_type = version_type,
                version_iteration = version_iteration,
                ipynb = True
        )
else: 
    print("Duplicate run-id is present, therefore cannot load it into the table")


### Run Preprocessing

In [None]:
!python {os.path.join(program_dir, 'GER_LP_Code', 'Pre_Processing.py')}

### QA Checks

In [None]:
!python {os.path.join(program_dir, 'GER_LP_Code', 'qa_checks.py')}

### Run Daily Input Read

In [None]:
!python {os.path.join(program_dir, 'GER_LP_Code', 'Daily_Input_Read.py')}

### Run Optimization Script

In [None]:
!python {os.path.join(program_dir, 'GER_LP_Code', 'ClientPharmacyMacOptimization.py')} \
    --custom-args-json {custom_params_json_path} \
    --template {program_dir}/GER_LP_Code/CPMO_parameters_TEMPLATE.py \
    --loglevel 'INFO'

### Run QA Script

In [None]:
!python {os.path.join(program_dir, 'GER_LP_Code', 'QA.py')} \
    --custom-args-json {custom_params_json_path}\
    --template {program_dir}/GER_LP_Code/CPMO_parameters_TEMPLATE.py

### Run Reporting

In [None]:
!python {os.path.join(program_dir, 'GER_LP_Code', 'CPMO_reporting_to_IA.py')}\
    --custom-args-json {custom_params_json_path}\
    --template {program_dir}/GER_LP_Code/CPMO_parameters_TEMPLATE.py

## Simulation/Optimization Framework:

In the following cells, depending on user-specified parameters, an optimization or a simulation run will be triggered. NOTE that an optimization run will be the same as the first iteration of a multi-price (multi-time) simulation. The following client types/run types are currently supported in the simulation mode as well:

- GOODRX_OPT: True/False,
- UNC_OPT: True/False,
- UNC_ADJUST: True/False,
- YTD_OVERRIDE: True/False,
- YTD_LAG_OVERRIDE: False,
- BRAND_SURPLUS_READ_CSV: True/False,
- GENERIC_LAUNCH: True/False,
- READ_FROM_BQ: True/False,
- WRITE_TO_BQ: True/False

Note that unlike the GCP version, the simulation framework is independent of the cells above and can begin to run by running the cells below

In [None]:
#setup
import os
import importlib
import jinja2 as jj2
import datetime as dt
import random
import numpy as np
import socket
from pytz import timezone
from typing import NamedTuple, List
import json
import GER_LP_Code.sim_utils as su

#### Simulation/Optimization parameter setup:
- For an optimization run, make sure to have one date in the `GO_LIVE_LIST`, and that the lengths of both `GPI_UP_LIST` and `GPI_LOW_LIST` is equal to 1 if `TIERED_PRICE_LIM: False`.

- For a simulation run, note that the length of the `GO_LIVE_LIST` should be more than 1 and equal to the lengths of `GPI_UP_LIST` and `GPI_LOW_LIST` if `TIERED_PRICE_LIM: False`.

In [None]:
#file Path Definitions
repo_head =  '/home/jupyter/clientpharmacymacoptimization'
output_path = 'home/jupyter/Output'
input_path = "gs://pbm-mac-lp-prod-ai-bucket/CHANGE"
custom_params_json_path = os.path.join(output_path, 'custom_params.json')

#set/reset working directory to the clientpharmacymacoptimization repo location
os.chdir(repo_head)
program_dir = os.path.abspath(os.curdir)

#simulation/Optimization parameter setup
sim_opt_params = {
    "GO_LIVE_LIST":['8/13/2021', '10/13/2021'],
    #"GPI_UP_LIST":["0.3", "0.3"],
    #"GPI_LOW_LIST":["0.3", "0.3"],
    "TIERED_PRICE_LIM": True,
    "PROGRAM_INPUT_PATH": input_path,
    "PROGRAM_OUTPUT_PATH": output_path
    #"CREDENTIALS_PATH": credential_path
}

QA checking the simulation/optimization setup

In [None]:
#check sim_opt_params setup
#check the length of go live dates with gpi up/low factors if price limits are not tiered
if not sim_opt_params['TIERED_PRICE_LIM']:
    assert len(sim_opt_params['GO_LIVE_LIST']) == len(sim_opt_params['GPI_UP_LIST'])
    assert len(sim_opt_params['GO_LIVE_LIST']) == len(sim_opt_params['GPI_LOW_LIST'])
    
#check if go live dates are increasing order
if len(sim_opt_params['GO_LIVE_LIST']) > 0:
    go_live_dates = [dt.datetime.strptime(date, '%m/%d/%Y') for date in sim_opt_params['GO_LIVE_LIST']]
    assert sorted(go_live_dates) == go_live_dates

Setting up custom parameters for a simulation/optimization run. Note that the `DATA_ID` can only include a timestamp component if the timestamp component does include time of day information. Otherwise, the dynamic input files of the first iteration of the simulation cannot be used for its subsequent iterations. Also, note that for local machine runs of the simulation code, do not use `socket.gethostname()` to determine the `USER` parameter.

In [None]:
#General parameter setup
custom_params = {
    "TIMESTAMP": "'{0}'.format(dt.datetime.now().strftime('%Y%m%d'))",
    "USER": "'c255085'",
    "FULL_YEAR": False,
    "CUSTOMER_ID": "['3775']",
    'CLIENT_NAME_TABLEAU':'SIMULATION_DEMO_State of Missisippi',
    "DATA_ID": ""'CHANGE_{}_{}'.format(CUSTOMER_ID[0], TIMESTAMP)",
    "BQ_INPUT_PROJECT_ID": "anbc-prod",
    "BQ_OUTPUT_DATASET": "ds_development_lp",
    "BQ_INPUT_DATASET_DS_PRO_LP": "fdl_gdp_ae_ds_pro_lp_share_ent_prod",
    "PROGRAM_INPUT_PATH": sim_opt_params['PROGRAM_INPUT_PATH'],
    "PROGRAM_OUTPUT_PATH": sim_opt_params['PROGRAM_OUTPUT_PATH'],
    "READ_FROM_BQ": True,
    "WRITE_TO_BQ": False,
    "UNC_OPT": False,
    "DROP_TABLES": False,
    "CLIENT_TYPE": "COMMERCIAL",
    "LAST_DATA": "dt.datetime.strptime('06/30/2021', '%m/%d/%Y')",
    "RAW_GOODRX": "'GoodRx price Jan file 04192021.xlsx'",
    "FLOOR_GPI_LIST": "'20201209_Floor_GPIs.csv'",
    "GOODRX_OPT": False,
    "FLOOR_PRICE": True,
    "UNC_ADJUST": True,
    "DATA_START_DAY": '2021-01-01',
    "TIERED_PRICE_LIM": sim_opt_params['TIERED_PRICE_LIM'],
    "UPLOAD_SIM_TO_DASH": False,
}

The function below, reads and processes data files that are neccessary to create a dynamic input for the next iteration such that model performance of the current iteration, matches the pre-existing performance of the following iteration. 

In [None]:
def seq_sim_data_transfer(input_path, 
                          new_output_path, 
                          pre_ytd_date, 
                          pre_golive_date, 
                          new_ytd_date, 
                          new_golive_date, 
                          custom_params, 
                          iteration) -> NamedTuple('Outputs', [('FLAG', bool)]):
    #setup
    import os
    import sys
    sys.path.append(repo_head)

    import pandas as pd
    import numpy as np
    import datetime as dt

    import GER_LP_Code.util_funcs as uf
    #uf.write_params(os.path.join(custom_params['PROGRAM_OUTPUT_PATH'], 'CPMO_parameters.py'))
    import GER_LP_Code.CPMO_parameters as p
    import GER_LP_Code.BQ as BQ
    import GER_LP_Code.sim_utils as su

    #######################################################################################################################################################
    #######################################################################################################################################################
    #reading data of the current iteration
    print('loading previous iteration data...')

    #reading total output#####################################################################################
    lp_output_df = pd.read_csv(os.path.join(input_path + '/Output/', 'Total_Output_' + p.DATA_ID + '.csv'),
                               dtype = p.VARIABLE_TYPE_DIC)
    #NOTE that, currently, Total_Output_ is written to cloud storage whether or not WRITE_TO_BQ is true

    #reading lp data##########################################################################################
    if p.UNC_OPT:
        lp_unc_input_df = pd.read_csv(os.path.join(input_path + '/Dynamic_Input/', 'lp_data_' + p.DATA_ID + '.csv'),
                                      dtype = p.VARIABLE_TYPE_DIC)
        lp_nounc_input_df = pd.read_csv(os.path.join(input_path + '/Dynamic_Input/', 'lp_data_nounc_' + p.DATA_ID + '.csv'),
                                        dtype = p.VARIABLE_TYPE_DIC)
        lp_vol_mv_agg_df_nounc = pd.read_csv(os.path.join(input_path + '/Output/', 'lp_data_nounc_' + p.DATA_ID + '.csv'),
                                             dtype = p.VARIABLE_TYPE_DIC)
    else:
        lp_input_df = pd.read_csv(os.path.join(input_path + '/Dynamic_Input/', 'lp_data_' + p.DATA_ID + '.csv'),
                                  dtype = p.VARIABLE_TYPE_DIC)
    #NOTE that, currently, lp_data_ is read from cloud storage only

    #reading spend data#######################################################################################
    if p.WRITE_TO_BQ:
        spend_data_df = uf.read_BQ_data(BQ.full_spend_data,
                                        project_id = p.BQ_OUTPUT_PROJECT_ID,
                                        dataset_id = p.BQ_OUTPUT_DATASET,
                                        table_id = 'Spend_data',
                                        run_id = p.AT_RUN_ID,
                                        client = uf.get_formatted_string(p.CUSTOMER_ID),
                                        period = p.TIMESTAMP,
                                        output = True)
    else:
        spend_data_df = pd.read_csv(os.path.join(input_path + '/Output/', 'Spend_data_' + str(p.TIMESTAMP) + str(pre_golive_date.month) + '.csv'),
                                    dtype = p.VARIABLE_TYPE_DIC)
    #reading mac list data ###################################################################################
    mac_list_df = pd.read_csv(os.path.join(input_path + '/Dynamic_Input/', 'mac_lists_' + p.DATA_ID + '.csv'),
                              dtype = p.VARIABLE_TYPE_DIC)
    #NOTE that, currently, mac_lists_ is read from cloud storage only

    #reading brand/generic offset data #######################################################################
    brand_gen_df = pd.read_csv(os.path.join(input_path + '/Dynamic_Input/', 'brand_surplus_' + p.DATA_ID + '.csv'))
    #NOTE that, currently brand_surplus_ data is only read from cloud storage

    #reading performance override data #######################################################################
    if p.YTD_OVERRIDE:
        perf_over_df = pd.read_csv(p.FILE_INPUT_PATH + p.LAG_YTD_Override_File, dtype = p.VARIABLE_TYPE_DIC)
        lag_perf_df = pd.read_csv(os.path.join(p.FILE_DYNAMIC_INPUT_PATH,  'lag_surplus_{}.csv'.format(p.DATA_ID)), dtype = p.VARIABLE_TYPE_DIC)
    print('finished loading the data.')

    #######################################################################################################################################################
    #######################################################################################################################################################
    #transforming data for next iteration

    #transforming mac lists data
    print('transforming mac lists prices to lp output final prices...')
    mac_list_df_new = su.seq_sim_mac_list_transfer(mac_list_df, lp_output_df)

    #transforming brand/generic surplus data
    brand_generic_new = su.seq_sim_brand_generic_transfer(brand_gen_df, pre_ytd_date, pre_golive_date, new_ytd_date, new_golive_date)

    #transforming lp data
    if p.UNC_OPT:
        lp_nounc_input_df_new = su.seq_sim_unc_lp_data_transfer(lp_nounc_input_df, lp_output_df,
                                                             pre_ytd_date, pre_golive_date, new_ytd_date, new_golive_date,
                                                             iteration = iteration, unc_flag = False,
                                                             lp_vol_mv_agg_df_nounc = lp_vol_mv_agg_df_nounc,
                                                             lp_input_df_unc = lp_unc_input_df)

        lp_unc_input_df_new = su.seq_sim_unc_lp_data_transfer(lp_unc_input_df, lp_output_df,
                                                           pre_ytd_date, pre_golive_date, new_ytd_date, new_golive_date,
                                                           iteration = iteration, unc_flag = True)
    else:
        lp_input_df_new = su.seq_sim_lp_data_transfer(lp_input_df, lp_output_df,
                                                   pre_ytd_date, pre_golive_date, new_ytd_date, new_golive_date)

    
    #transforming performance override data
    if p.YTD_OVERRIDE:
        print('transforming performance override data...')
        perf_over_df_new = su.seq_sim_perf_override_transfer(perf_over_df, lag_perf_df)

    #######################################################################################################################################################
    #######################################################################################################################################################
    ####performance qa checks
    print('finished transforming data.')
    #print('QA checks for transformed data...')
    #
    #print('All QA checks have passed.')

    #######################################################################################################################################################
    #######################################################################################################################################################
    ####write the transformed data
    print('writing transformed data ...')
    mac_list_df_new.to_csv(os.path.join(new_output_path, 'mac_lists_' + p.DATA_ID + '.csv'), index = False)
    brand_generic_new.to_csv(os.path.join(new_output_path, 'brand_surplus_' + p.DATA_ID + '.csv'),index = False)
    if p.UNC_OPT:
        lp_nounc_input_df_new.to_csv(os.path.join(new_output_path, 'lp_data_nounc_' + p.DATA_ID + '.csv'), index = False)
        lp_unc_input_df_new.to_csv(os.path.join(new_output_path, 'lp_data_' + p.DATA_ID + '.csv'), index = False)
    else:
        lp_input_df_new.to_csv(os.path.join(new_output_path, 'lp_data_' + p.DATA_ID + '.csv'), index = False)

    if p.YTD_OVERRIDE:
        perf_over_df_new.to_csv(p.FILE_INPUT_PATH + p.LAG_YTD_Override_File, index = False)
    
    return True

#### Simulation/Optimization pipeline run:

The cell below, executes the main simulation loop. The first iteration, runs in a similar fashion to a regular optimization run. After that `LAST_DATA` and `GO_LIVE` dates are shifted forward, and outputs are used to create dynamimc input files such that the model performance of the current iteration matches the pre-existing performance of the next iteration. This is done by calling `<seq_sim_data_transfer>` function of the `sim_utils.py` module which houses different data transfer and reporting functions for the simulation model. 

Note that an optimization run is identified by having a `sim_opt_params["GO_LIVE_LIST"]` of length 1 whereas in the simulation mode, the length of `sim_opt_params["GO_LIVE_LIST"]` can be more than 1. 

Also, note that for local windows runs, the <subprocess> command for copying data is different and the correct line should be uncommented. 

In [None]:
import time

num_iterations = len(sim_opt_params["GO_LIVE_LIST"])
failed_at = num_iterations
initial_last_data = custom_params["LAST_DATA"]
start_time = time.time()

for i in range(num_iterations):
    
    print('######################################################################')
    if num_iterations == 1: #error: use custom_params['GO_LIVE'] before assignment
        print('Optimization run: \n\tLAST DATA date:', 
              eval(custom_params['LAST_DATA']), 
              '\n\tGO LIVE date:', 
              sim_opt_params['GO_LIVE_LIST'][0])
    else:
        print('Simulation iteration:', i, '\n\tLAST DATA date:', 
              eval(custom_params['LAST_DATA']), 
              '\n\tGO LIVE date:', 
              sim_opt_params['GO_LIVE_LIST'][i])
    
    #alter <custom_params> before pipeline run
    custom_params['PROGRAM_OUTPUT_PATH'] = output_path + "/GO_LIVE_{0}".format(sim_opt_params["GO_LIVE_LIST"][i].replace('/', '-'))
    custom_params['GO_LIVE'] = "dt.datetime.strptime('{0}', '%m/%d/%Y')".format(sim_opt_params["GO_LIVE_LIST"][i])
    custom_params['RUN_TYPE_TABLEAU'] = 'SIMULATION_{0}'.format(sim_opt_params["GO_LIVE_LIST"][i].replace('/', '-'))
    custom_params['SKIP_TO_OPT'] = True if i > 0 else False
    custom_params['AT_RUN_ID'] = "{1}{0}".format(eval(custom_params['TIMESTAMP']),i)
    if not sim_opt_params['TIERED_PRICE_LIM']:
        if ("GPI_UP_LIST" in sim_opt_params):
            custom_params['GPI_UP_FAC'] = sim_opt_params['GPI_UP_LIST'][i]
        if ("GPI_LOW_LIST" in sim_opt_params):
            custom_params['GPI_LOW_FAC'] = sim_opt_params['GPI_LOW_LIST'][i]
    
    #directory support
    os.makedirs(custom_params['PROGRAM_OUTPUT_PATH'], exist_ok = True)
    for d in ['Output', 'Logs', 'LP', 'Dynamic_Input']:
        os.makedirs(os.path.join(custom_params['PROGRAM_OUTPUT_PATH'], d), exist_ok=True)
    
    #create CPMO_parameters.py based on <custom_params>
    template = jj2.Template(open(os.path.join(program_dir, 'GER_LP_Code/CPMO_parameters_TEMPLATE.py')).read())
    params = template.render(**custom_params)
    #create parameters file with custom params set
    #(note: this overwrites the CPMO_paramters.py file)
    with open(os.path.join(program_dir, 'GER_LP_Code/CPMO_parameters.py'), 'w') as pfile:
        pfile.write(params)
    # dump custom params to file for use with scripts
    with open(custom_params_json_path, 'w') as f:
        json.dump(custom_params, f)
        
    
    if i==0: #only the first iteration will run the following components of a regular optimization run
        print('run pre_processing script...')
        !python {os.path.join(program_dir, 'GER_LP_Code', 'Pre_Processing.py')}
        print('run input qa_checks script...')
        !python {os.path.join(program_dir, 'GER_LP_Code', 'qa_checks.py')}
        print('run daily_input_read script...')
        !python {os.path.join(program_dir, 'GER_LP_Code', 'Daily_Input_Read.py')}
    
    #create lp, solve, and produce output
    print('run input cpmo script...')
    !python {os.path.join(program_dir, 'GER_LP_Code', 'ClientPharmacyMacOptimization.py')} \
        --custom-args-json {custom_params_json_path} \
        --template {program_dir}/GER_LP_Code/CPMO_parameters_TEMPLATE.py \
        --loglevel 'INFO'
    
    print('run final qa script...')
    #qa check lp results
    !python {os.path.join(program_dir, 'GER_LP_Code', 'QA.py')}\
        --custom-args-json {custom_params_json_path}\
        --template {program_dir}/GER_LP_Code/CPMO_parameters_TEMPLATE.py
    
    print('run report to dashboard script...')
    #report results to dashboard
    !python {os.path.join(program_dir, 'GER_LP_Code', 'CPMO_reporting_to_IA.py')}\
        --custom-args-json {custom_params_json_path}\
        --template {program_dir}/GER_LP_Code/CPMO_parameters_TEMPLATE.py
    
    print(f'Iteration {i} submitted at: {custom_params["TIMESTAMP"]}')
    
    #this is to distinguish between simpulation and optimization run
    #simulation iterations require data transformation after the first iteration
    if i+1 < num_iterations:
        print(f'Transferring output data of iteration {i} to dynamic input of iteration {i + 1}')
        data_transfer_start_time = time.time()
        
        #create folder for next iteration if necessary
        next_output_path = output_path + "/GO_LIVE_{0}".format(sim_opt_params["GO_LIVE_LIST"][i + 1].replace('/', '-'))
        
        ##for windows runs on local machines, uncomment the lines below to copy dynamic input folder
        #import subprocess
        #status = subprocess.call('copy {0} {1}'.format(custom_params['PROGRAM_OUTPUT_PATH'] + '/Dynamic_Input',
        #                                             next_output_path + '/Dynamic_Input'),
        #                         shell = True)
        
        ##for linux/unix runs on cloud, uncomment the lines below to copy dynamic input folder
        import subprocess
        status_make = subprocess.call('mkdir -p {0}'.format(next_output_path),shell = True)
        status = subprocess.call('cp -r {0} {1}'.format(custom_params['PROGRAM_OUTPUT_PATH'] + '/Dynamic_Input',
                                                     next_output_path + '/Dynamic_Input'),
                                 shell = True)
        
        if status != 0:
            if status < 0:
                print("Copying dynamic input killed by signal", status)
            else:
                print("Copying dynamic input failed with return code - ", status)
        else:
            print('Dynamic input folder copied.')
               
        #shift LAST_DATA and GO_LIVE dates forward
        prev_last_data = custom_params["LAST_DATA"]
        prev_go_live = custom_params["GO_LIVE"]
        new_last_data = custom_params["GO_LIVE"] + "+ dt.timedelta(days = -1)"
        new_go_live = "dt.datetime.strptime('{0}', '%m/%d/%Y')".format(sim_opt_params["GO_LIVE_LIST"][i + 1])
                
        #alter dynamic input files for next iteration
        results = seq_sim_data_transfer(custom_params['PROGRAM_OUTPUT_PATH'],
                                        next_output_path + '/Dynamic_Input', 
                                        eval(prev_last_data),
                                        eval(prev_go_live), 
                                        eval(new_last_data), 
                                        eval(new_go_live),
                                        custom_params,
                                        i)
        
        #the new LAST_DATA date will be the previous GO_LIVE date
        #NOTE that the new GO_LIVE date will be set at the start of the for loop
        custom_params["LAST_DATA"] = new_last_data
        #custom_params['AT_RUN_ID'] = audit_obj.get_latest_run_id(table_name = 'AT_Run_ID') #subject to clean up
        #sim_opt_params['AT_RUN_ID_LIST'].append(custom_params['AT_RUN_ID'])     
        
        print('Data converted for next iteration in {0} seconds'.format(time.time() - data_transfer_start_time))
        print('######################################################################')

print('Simulation of {0} iterations was completed in {1} seconds'.format(num_iterations, time.time() - start_time))

In [None]:
if failed_at > 0 and num_iterations > 1:
    #create simulation specific reports
    su.create_sim_report(output_path,
                      custom_params,
                      sim_opt_params,
                      'Simulation_Run_Report', 
                      failed_at,
                      initial_last_data)
    #create simulation specific QA checks
    su.create_qa_report(output_path,
                     custom_params,
                     sim_opt_params,
                     'Simulation_QA_Report', 
                     failed_at)
    print('Simulation specific reports for results and QA is generated.')
elif failed_at == 0:
    print('Simulation failed in first iteration. Adjust parameters and try again.')