In [None]:
%flink.ssql
-- Source kinesis Table
DROP TABLE IF EXISTS source_hospital_patients_claims_stream_table;
CREATE TABLE `source_hospital_patients_claims_stream_table` (
    `partition_key` STRING,
    `patient_id` STRING,
    `name_prefix` STRING,
    `first_name` STRING,
    `last_name` STRING,
    `date_of_birth` STRING,
    `phone_number` STRING,
    `email_id` STRING,
    `policy_id` STRING,
    `policy_start_date` STRING,
    `policy_end_date` STRING,
    `preimum_amount` STRING,
    `coverage_limit` STRING,
    `addres_id` STRING,
    `addressline` STRING,
    `city` STRING,
    `state` STRING,
    `borough` STRING,
    `claim_id` STRING,
    `claim_initialized_date` STRING,
    `claim_request_amount` STRING,
    `claim_status` STRING,
    `claim_rejected_reason` STRING
)
WITH (
  'connector' = 'kinesis',
  'stream' = 'kinesis-event-stream-raw',
  'aws.region' = 'us-east-1',
  'scan.stream.initpos' = 'LATEST',
  'format' = 'json'
);


In [None]:
%flink.ssql
-- Processed kinesis Table 
DROP TABLE IF EXISTS processed_hospital_patients_claims_stream_table;
CREATE TABLE `processed_hospital_patients_claims_stream_table` (
    `partition_key` STRING,
    `patient_id` STRING,
    `name_prefix` STRING,
    `first_name` STRING,
    `last_name` STRING,
    `date_of_birth` STRING,
    `phone_number` STRING,
    `email_id` STRING,
    `policy_id` STRING,
    `policy_start_date` STRING,
    `policy_end_date` STRING,
    `preimum_amount` STRING,
    `coverage_limit` STRING,
    `addres_id` STRING,
    `addressline` STRING,
    `city` STRING,
    `state` STRING,
    `borough` STRING,
    `claim_id` STRING,
    `claim_initialized_date` STRING,
    `claim_request_amount` STRING,
    `claim_status` STRING,
    `claim_rejected_reason` STRING
)
WITH (
  'connector' = 'kinesis',
  'stream' = 'kinesis-event-stream-processed',
  'aws.region' = 'us-east-1',
  'format' = 'json'
);

In [None]:
%flink.ssql
-- Lookup UK London Borough Details
CREATE TABLE london_borough_locations_table (
  `borough` STRING,
  `borough_level` STRING,
  `borough_latitude` STRING,
  `borough_longitude` STRING
)WITH (
  'connector' = 'filesystem',  
  'path' = 's3://hospital-patients-claims-bucket/flink_sql_borough_locations.csv', 
  'format' = 'csv'
)


In [None]:
%flink.ssql(type=insert)
INSERT INTO processed_hospital_patients_claims_stream_table
SELECT 
    partition_key,
    patient_id ,
    name_prefix ,
    first_name ,
    last_name ,
    date_of_birth ,
    phone_number ,
    email_id ,
    policy_id ,
    policy_start_date ,
    policy_end_date ,
    preimum_amount ,
    coverage_limit ,
    addres_id ,
    addressline ,
    city ,
    state ,
    london_borough_locations_table.borough ,
    borough_level ,
    borough_latitude ,
    borough_longitude ,
    claim_id ,
    claim_initialized_date ,
    claim_request_amount ,
    claim_status ,
    claim_rejected_reason
from source_hospital_patients_claims_stream_table
join london_borough_locations_table
on london_borough_locations_table.borough = source_hospital_patients_claims_stream_table.borough
Where patient_id is NOT NULL or policy_id is NOT NULL or claim_id is NOT NULL or addres_id is NOT NULL 
Or source_hospital_patients_claims_stream_table.borough not in ['NONE','NULL','NOT_FOUND']  

In [None]:
%flink.ssql
-- Kinesis Rejected Stream Table
DROP TABLE IF EXISTS kinesis_rejected_hospital_patients_claims;
CREATE TABLE `kinesis_rejected_hospital_patients_claims` (
    `RECORD_REJECTED_CODE` STRING,
    `partition_key` STRING,
    `patient_id` STRING,
    `name_prefix` STRING,
    `first_name` STRING,
    `last_name` STRING,
    `date_of_birth` STRING,
    `phone_number` STRING,
    `email_id` STRING,
    `policy_id` STRING,
    `policy_start_date` STRING,
    `policy_end_date` STRING,
    `preimum_amount` STRING,
    `coverage_limit` STRING,
    `addres_id` STRING,
    `addressline` STRING,
    `city` STRING,
    `state` STRING,
    `borough` STRING,
    `claim_id` STRING,
    `claim_initialized_date` STRING,
    `claim_request_amount` STRING,
    `claim_status` STRING,
    `claim_rejected_reason` STRING
 )
WITH (
  'connector' = 'kinesis',
  'stream' = 'kinesis-event-stream-rejected',
  'aws.region' = 'us-east-1',
  'format' = 'json'
);


In [None]:
%flink.ssql(type=insert)
Insert into kinesis_rejected_hospital_patients_claims 
Select 
CASE 
WHEN patient_id is NULL Then '101' 
WHEN policy_id is NULL Then '102'
WHEN claim_id is NULL Then '103' 
when addres_id is NULL Then '104'
WHEN claim_request_amount = '0' Then '105'
else '0' as RECORD_REJECTED_REASON, *
from source_hospital_patients_claims_stream_table
Where patient_id is NULL or policy_id is NULL or claim_id is NULL or addres_id is NULL 