# Hospital Patient Claims
## Staging to Curated 

####  Run this cell to set up and start your interactive session.


In [None]:
%idle_timeout 10
%glue_version 5.0
%worker_type G.1X
%number_of_workers 2

import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
import awsglue.transforms as  T
import pyspark.sql.functions as  F
from pyspark.sql.types import * 
from awsglue import DynamicFrame
import json 
from datetime import date
import boto3
from botocore.exceptions import ClientError
  
sc = SparkContext.getOrCreate()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)
args = getResolvedOptions(sys.argv, ['JOB_NAME','is_init_load'])

In [None]:
import boto3, json
def get_secret():
    secret_name = "dev/hospital_patients_claims/redshift_connection"
    region_name = "us-east-1"
    client = boto3.client(
        service_name='secretsmanager',
        region_name=region_name
    )
    try:
        get_secret_value_response = client.get_secret_value(
            SecretId=secret_name
        )
    except ClientError as e:
        raise e
    db_config = get_secret_value_response['SecretString']
    return db_config

db_config = json.loads(get_secret())
my_conn_options = {
    "url": db_config['dev_url'],
    "user": db_config['dev_username'],
    "password": db_config['dev_password'],
    "redshiftTmpDir": db_config['dev_redshift_temp_directory'],
}

In [None]:
initial_load = args['is_init_load']

In [None]:
todays_date = date.today().strftime('%Y-%m-%d')

In [None]:
# incremental load table 
df_incremental_load_conn_options = my_conn_options
my_conn_options['dbtable'] = "staging_claims_incremental_load"
df_incremental_load_conn_options['sampleQuery'] = f"Select * from staging_claims_incremental_load where load_date = '{todays_date}' "
df_incremental_load = glueContext.create_data_frame.from_options(
    connection_type = 'redshift',
    connection_options = df_incremental_load_conn_options,
)

In [None]:
# claim table 
df_staging_claims = df_incremental_load.select('claim_id','high_risk_claim_flag','claim_initialized_date','claim_request_amount','claim_status','claim_rejected_reason')

# patient table 
df_staging_patient = df_incremental_load.select('patient_id','name_prefix','first_name','last_name','patient_full_name','date_of_birth','phone_number','email_id')

# policy table 
df_staging_policy = df_incremental_load.select('policy_id','policy_start_date','policy_end_date','preimum_amount','coverage_limit')

# address table 
df_staging_address = df_incremental_load.select('address_id','addressline','borough','borough_level','borough_latitude','borough_longitude','borough_abbrev','borough_code','city','state')

In [None]:
claims_insert_query_post_action ="""
INSERT INTO dim_claims(claim_id,high_risk_claim_flag,claim_initialized_date,claim_request_amount,claim_status,claim_rejected_reason)
SELECT claim_id,high_risk_claim_flag,claim_initialized_date,claim_request_amount,claim_status,claim_rejected_reason
FROM staging_claims;
"""
# loading Staging Claims
my_conn_options['dbtable'] = "staging_claims"
my_conn_options['preactions'] = "TRUNCATE "+my_conn_options['dbtable']
my_conn_options['postactions'] = claims_insert_query_post_action 
glueContext.write_dynamic_frame.from_options(
    connection_type="redshift",
    connection_options = my_conn_options,
    frame = DynamicFrame.fromDF(df_staging_claims, glueContext, 'redshift_write_claims'),
)

In [None]:
patient_merge_query_post_action ="""

UPDATE dim_patient 
SET is_current = 'N', effective_end_date = getdate()
FROM staging_patient
WHERE staging_patient.patient_id = dim_patient.patient_id
and dim_patient.is_current = 'Y'
;

INSERT INTO dim_patient(patient_id,name_prefix,first_name,last_name,patient_full_name,date_of_birth,phone_number,email_id)
SELECT patient_id,name_prefix,first_name,last_name,patient_full_name,date_of_birth,phone_number,email_id
FROM staging_patient;

"""
# loading Stagting Patients
my_conn_options['dbtable'] = "staging_patient"
my_conn_options['preactions'] = "TRUNCATE "+my_conn_options['dbtable']
my_conn_options['postactions'] = patient_merge_query_post_action 
glueContext.write_dynamic_frame.from_options(
    connection_type="redshift",
    connection_options = my_conn_options,
    frame = DynamicFrame.fromDF(df_staging_patient, glueContext, 'redshift_write_patient'),
)

In [None]:
policy_merge_query_post_action ="""
UPDATE dim_policy 
SET is_current = 'N', effective_end_date = getdate()
FROM staging_policy
WHERE staging_policy.policy_id = dim_policy.policy_id
and dim_policy.is_current = 'Y'
;

INSERT INTO dim_policy(policy_id,policy_start_date,policy_end_date,premium_amount,coverage_limit)
SELECT policy_id,policy_start_date,policy_end_date,preimum_amount,coverage_limit
FROM staging_policy;

"""
# loading Staging Policy
my_conn_options['dbtable'] = "staging_policy"
my_conn_options['preactions'] = "TRUNCATE "+my_conn_options['dbtable']
my_conn_options['postactions'] = policy_merge_query_post_action 
glueContext.write_dynamic_frame.from_options(
    connection_type="redshift",
    connection_options = my_conn_options,
    frame =  DynamicFrame.fromDF(df_staging_policy, glueContext, 'redshift_write_policy'),
)

In [None]:
address_merge_query_post_action ="""

UPDATE dim_address 
SET is_current = 'N', effective_end_date = getdate()
FROM staging_address
WHERE staging_address.address_id = dim_address.address_id
and dim_address.is_current = 'Y'
;

INSERT INTO dim_address(address_id,addressline,borough,borough_level,borough_latitude,borough_longitude,borough_abbrev,borough_code,city,state)
SELECT address_id,addressline,borough,borough_level,borough_latitude,borough_longitude,borough_abbrev,borough_code,city,state
FROM staging_address;

"""
# loading Staging Address
my_conn_options['dbtable'] = "staging_address"
my_conn_options['preactions'] = "TRUNCATE "+my_conn_options['dbtable']
my_conn_options['postactions'] = address_merge_query_post_action 
glueContext.write_dynamic_frame.from_options(
    connection_type="redshift",
    connection_options = my_conn_options,
    frame = DynamicFrame.fromDF(df_staging_address, glueContext, 'redshift_write_address'),
)

In [None]:
# loading Curated Dim Date
if initial_load:
    dim_date = spark.sql('''
    SELECT explode(sequence(to_date('2000-01-01'), to_date('2030-01-01'), interval 1 months)) as date
    ''')
    dim_date_cols = {
    "date_id":date_format(dim_date.date,'MMdd'),
    "month":date_format(dim_date.date, 'M'),
    "month_short":date_format(dim_date.date, "LLL"), 
    "month_long":date_format(dim_date.date, "LLLL"),
    "year_short":date_format(dim_date.date, 'yy'),
    "year_long":date_format(dim_date.date, 'yyyy'),
    "quarter":ceil(date_format(dim_date.date, 'M')/3),
    }
    dim_date = dim_date.withColumns(dim_date_cols)
    dim_date = dim_date.drop('date')
    dfy_dim_date = DynamicFrame.fromDF(dim_date, glueContext, 'redshift_write_dim_date')
    my_conn_options['dbtable'] = "dim_date"
    glueContext.write_dynamic_frame.from_options(
        connection_type="redshift",
        connection_options = my_conn_options,
        frame = dfy_dim_date
    )

In [None]:
fact_table_insert_query_post_action ="""

INSERT INTO fact_claims_hist(patient_ref_key,claim_ref_key,address_ref_key,policy_ref_key,last_updated_date)
(
SELECT 
dim_patient.patient_ref_key,
dim_claims.claim_ref_key,
dim_address.address_ref_key,
dim_policy.policy_ref_key,
getdate() last_updated_date
FROM staging_claims_incremental_load
join dim_claims on staging_claims_incremental_load.claim_id = dim_claims.claim_id
join dim_patient on staging_claims_incremental_load.patient_id = dim_patient.patient_id and dim_patient.is_current = 'Y' and dim_patient.effective_end_date is NULL 
join dim_address on staging_claims_incremental_load.address_id = dim_address.address_id and dim_address.is_current = 'Y' and dim_address.effective_end_date is NULL 
join dim_policy on staging_claims_incremental_load.policy_id = dim_policy.policy_id and dim_policy.is_current = 'Y' and dim_policy.effective_end_date is NULL 
);

"""
# loading dim claims Incremental Load 
my_conn_options['dbtable'] = "dim_claims_incremental_load"
my_conn_options['postactions'] = fact_table_insert_query_post_action
glueContext.write_dynamic_frame.from_options(
    connection_type="redshift",
    connection_options = my_conn_options,
    frame = DynamicFrame.fromDF(df_incremental_load, glueContext, 'redshift_write_claims_incremental_load') ,
)