In [0]:
from pyspark.sql.functions import *

In [0]:
%sql
use catalog emrcatalog

In [0]:
df_hosa=spark.read.parquet('/mnt/bronze/hosa/encounters')
df_hosb=spark.read.parquet('/mnt/bronze/hosb/encounters')

df_merged=df_hosa.unionByName(df_hosb)

display(df_merged.limit(5))

df_merged.createOrReplaceTempView('encounters')

In [0]:
%sql
create or replace temp view quality_checks as
select concat(encounterid,'-',datasource) as encounter_id,
encounterid as src_encounter_id,
patientid,
encounterdate,
encountertype,
providerid,
departmentid,
procedurecode,
inserteddate as src_InsertedDate,
modifieddate as src_modifieddate,
datasource,
case when EncounterID IS NULL OR PatientID IS NULL THEN TRUE else false end as is_quarantined
from encounters;

In [0]:
%sql
CREATE TABLE IF NOT EXISTS silver.encounters (
EncounterID string,
SRC_EncounterID string,
PatientID string,
EncounterDate date,
EncounterType string,
ProviderID string,
DepartmentID string,
ProcedureCode integer,
SRC_InsertedDate date,
SRC_ModifiedDate date,
datasource string,
is_quarantined boolean,
audit_insertdate timestamp,
audit_modifieddate timestamp,
is_current boolean
)
USING DELTA;

In [0]:
%sql
-- Update old record to implement SCD Type 2
merge into silver.encounters as target
using quality_checks as source
on target.EncounterID = source.encounter_id and target.is_current = true
when matched and
(
  source.src_encounter_id!=target.SRC_EncounterID or
  source.patientid!=target.patientid or
  source.EncounterDate!=target.EncounterDate or
  source.EncounterType!=target.EncounterType or
  source.ProviderID!=target.ProviderID or
  source.DepartmentID!=target.DepartmentID or
  source.ProcedureCode!=target.ProcedureCode or
  source.SRC_InsertedDate!=target.SRC_InsertedDate or
  source.SRC_ModifiedDate!=target.SRC_ModifiedDate or
  source.datasource!=target.datasource or
  source.is_quarantined!=target.is_quarantined
)
then update set target.is_current=false,target.audit_modifieddate=current_timestamp()

In [0]:
%sql
-- Insert new record to implement SCD Type 2
merge into silver.encounters as target
using quality_checks as source
on target.EncounterID = source.encounter_id and target.is_current = true
when not matched then
insert
(
  EncounterID,
  SRC_EncounterID,
  PatientID,
  EncounterDate,
  EncounterType,
  ProviderID,
  DepartmentID,
  ProcedureCode,
  SRC_InsertedDate,
  SRC_ModifiedDate,
  datasource,
  is_quarantined,
  audit_insertdate,
  audit_modifieddate,
  is_current
)
values
(
  source.encounter_id,
  source.src_encounter_id,
  source.patientid,
  source.EncounterDate,
  source.EncounterType,
  source.ProviderID,
  source.DepartmentID,
  source.ProcedureCode,
  source.SRC_InsertedDate,
  source.SRC_ModifiedDate,
  source.datasource,
  source.is_quarantined,
  current_timestamp(),
  current_timestamp(),
  true
);

In [0]:
%sql
select SRC_EncounterID,datasource,count(patientid) from  silver.encounters group by all order by 3 desc;