In [0]:
from pyspark.sql.functions import *

In [0]:
%sql 
use catalog emrcatalog

In [0]:
df_hosa=spark.read.parquet('/mnt/bronze/hosa/transactions')
df_hosb=spark.read.parquet('/mnt/bronze/hosb/transactions')

df_merged=df_hosa.unionByName(df_hosb)

display(df_merged.limit(5))

df_merged.createOrReplaceTempView('transactions')

In [0]:
%sql
create or replace temp view quailty_checks as
select concat(transactionid,'-',datasource) as transactionid,
transactionid as src_transactionid,
encounterid,
patientid,
providerid,
deptid,
visitdate,
servicedate,
paiddate,
visittype,
amount,
amounttype,
paidamount,
claimid,
payorid,
procedurecode,
icdcode,
lineofbusiness,
medicaidid,
medicareid,
insertdate as src_insertdate,
modifieddate as src_modifieddate,
datasource,
case when transactionid is null or encounterid is null or patientid is null or  visitdate is null then true else false end as is_quarantined
from transactions;

In [0]:
%sql
select * from quailty_checks limit 5;

In [0]:
%sql
CREATE TABLE IF NOT EXISTS silver.transactions (
  TransactionID string,
  SRC_TransactionID string,
  EncounterID string,
  PatientID string,
  ProviderID string,
  DeptID string,
  VisitDate date,
  ServiceDate date,
  PaidDate date,
  VisitType string,
  Amount double,
  AmountType string,
  PaidAmount double,
  ClaimID string,
  PayorID string,
  ProcedureCode integer,
  ICDCode string,
  LineOfBusiness string,
  MedicaidID string,
  MedicareID string,
  SRC_InsertDate date,
  SRC_ModifiedDate date,
  datasource string,
  is_quarantined boolean,
  audit_insertdate timestamp,
  audit_modifieddate timestamp,
  is_current boolean
)
USING DELTA;

In [0]:
%sql
-- step1 : mark existing records as historical (is_current=false) for patients will be updated
merge into silver.transactions as target
using quailty_checks as source
on target.TransactionID=source.TransactionID and target.is_current=true
when matched 
and -- check if any of below mentioned column is changing
(
  target.src_transactionid <> source.src_transactionid or
  target.encounterid <> source.encounterid or
  target.patientid <> source.patientid or
  target.providerid <> source.providerid or
  target.deptid <> source.deptid or
  target.visitdate <> source.visitdate or
  target.servicedate <> source.servicedate or
  target.paiddate <> source.paiddate or
  target.visittype <> source.visittype or
  target.amount <> source.amount or
  target.amounttype <> source.amounttype or
  target.paidamount <> source.paidamount or
  target.claimid <> source.claimid or
  target.payorid <> source.payorid or
  target.procedurecode <> source.procedurecode or
  target.icdcode <> source.icdcode or
  target.lineofbusiness <> source.lineofbusiness or
  target.medicaidid <> source.medicaidid or
  target.src_insertdate <> source.src_insertdate or
  target.src_modifieddate <> source.src_modifieddate or
  target.medicareid <> source.medicareid or
  target.MedicaidID <> source.MedicaidID or
  target.datasource <> source.datasource or
  target.is_quarantined <> source.is_quarantined
)
then update set target.is_current=false,target.audit_modifieddate=current_timestamp() -- end the record 


In [0]:
%sql
-- step1 : insert into record when new record is coming or when a record is updated
merge into silver.transactions as target
using quailty_checks as source
on target.TransactionID=source.TransactionID and target.is_current=true
when not matched then
insert 
(
  TransactionID,
SRC_TransactionID,
EncounterID,
PatientID,
ProviderID,
DeptID,
VisitDate,
ServiceDate,
PaidDate,
VisitType,
Amount,
AmountType,
PaidAmount,
ClaimID,
PayorID,
ProcedureCode,
ICDCode,
LineOfBusiness,
MedicaidID,
MedicareID,
SRC_InsertDate,
SRC_ModifiedDate,
datasource,
is_quarantined,
audit_insertdate,
audit_modifieddate,
is_current
)
values 
(
  source.TransactionID,
  source.src_transactionid,
  source.encounterid,
  source.patientid,
  source.providerid,
  source.deptid,
  source.visitdate,
  source.servicedate,
  source.paiddate,
  source.visittype,
  source.amount,
  source.amounttype,
  source.paidamount,
  source.claimid,
  source.payorid,
  source.procedurecode,
  source.icdcode,
  source.lineofbusiness,
  source.medicaidid,
  source.medicareid,
  source.src_insertdate,
  source.src_modifieddate,
  source.datasource,
  source.is_quarantined,
  current_timestamp(),
  current_timestamp(),
  true
);

In [0]:
%sql
select * from silver.transactions limit 10;