### **Implementing Slowly Changing Dimensions Type - 2 and Commman Data Modeling for Patients data**

In [0]:
from pyspark.sql import functions as f

#Reading patients data from Hospital-1
patients_hospital1 = spark.read.parquet('/mnt/bronze/Hospital-1/patients')
#patients_hospital1 = spark.read.parquet('/mnt/bronze/Hospital-1/Archive/2025/2/18/patients')
#Reading patients data from Hospital-2
patients_hospital2 = spark.read.parquet('/mnt/bronze/Hospital-2/patients')
#patients_hospital2 = spark.read.parquet('/mnt/bronze/Hospital-2/Archive/2025/2/18/patients')

#patients_hospital1=patients_hospital1.withColumn('datasource',f.lit('Hospital-1'))
#patients_hospital2=patients_hospital2.withColumn('datasource',f.lit('Hospital-2'))

patients_hospital1.createOrReplaceTempView('patients_hospital1')
patients_hospital2.createOrReplaceTempView('patients_hospital2')

In [0]:
%sql
SELECT * FROM patients_hospital1;

In [0]:
%sql
DESCRIBE patients_hospital1

In [0]:
%sql
DESCRIBE patients_hospital2;

In [0]:
%sql
SELECT * FROM patients_hospital2;

In [0]:
%sql
CREATE OR REPLACE TEMP VIEW cmd_patients
AS
SELECT concat(SRC_PatientID,'-',datasource) AS Patient_Key,*
FROM(
  SELECT
PatientID AS SRC_PatientID,
FirstName,
LastName,
MiddleName,
SSN,
PhoneNumber,
Gender,
DOB,
Address,
ModifiedDate,
datasource
FROM patients_hospital1
UNION ALL
SELECT 
ID AS SRC_PatientID,
F_Name AS FirstName,
L_Name AS LastName,
M_Name AS MiddleName,
SSN,
PhoneNumber,
Gender,
DOB,
Address,
Updated_Date AS ModifiedDate,
datasource
FROM patients_hospital2 
)

In [0]:
%sql
DESCRIBE cmd_patients

In [0]:
%sql
CREATE OR REPLACE TEMP VIEW quality_checks
AS(
  SELECT *,
  CASE
  WHEN SRC_PatientID IS NULL OR DOB IS NULL OR FirstName IS NULL or lower(FirstName)='null' THEN TRUE
  ELSE FALSE END AS is_quarantined 
  FROM cmd_patients
)

In [0]:
%sql
DESCRIBE quality_checks;

In [0]:
%sql
SELECT * FROM quality_checks ORDER BY is_quarantined DESC;

In [0]:
%sql
CREATE TABLE IF NOT EXISTS healthcarerevenuecyclemanagement_databricks.silver.patients(
Patient_Key STRING,
SRC_PatientID STRING,
FirstName STRING,
LastName STRING,
MiddleName STRING,
SSN STRING,
PhoneNumber STRING,
Gender STRING,
DOB DATE,
Address STRING,
SRC_ModifiedDate DATE,
datasource STRING,
is_quarantined BOOLEAN,
inserted_date TIMESTAMP,
modified_date TIMESTAMP,
is_current BOOLEAN
) USING DELTA;

In [0]:
%sql
MERGE INTO healthcarerevenuecyclemanagement_databricks.silver.patients AS target
USING quality_checks AS source
ON target.Patient_Key = source.Patient_Key
AND target.is_current = true
WHEN MATCHED 
AND (
     target.SRC_PatientID <> source.SRC_PatientID
  OR target.FirstName <> source.FirstName
  OR target.LastName <> source.LastName
  OR target.MiddleName <> source.MiddleName
  OR target.SSN <> source.SSN
  OR target.PhoneNumber <> source.PhoneNumber
  OR target.Gender <> source.Gender
  OR target.DOB <> source.DOB
  OR target.Address <> source.Address
  OR target.SRC_ModifiedDate <> source.ModifiedDate
  OR target.datasource <> source.datasource
  OR target.is_quarantined <> source.is_quarantined)
THEN UPDATE SET 
    target.is_quarantined = false,
    target.modified_date = current_timestamp()

In [0]:
%sql
MERGE INTO silver.patients AS target
USING quality_checks AS source
ON target.Patient_Key = source.Patient_Key
AND target.is_current = TRUE
WHEN NOT MATCHED 
THEN INSERT (
Patient_Key,
SRC_PatientID,
FirstName,
LastName,
MiddleName,
SSN,
PhoneNumber,
Gender,
DOB,
Address,
SRC_ModifiedDate,
datasource,
is_quarantined,
inserted_date,
modified_date,
is_current) 
VALUES (
  source.Patient_Key,
  source.SRC_PatientID,
  source.FirstName,
  source.LastName,
  source.MiddleName,
  source.SSN,
  source.PhoneNumber,
  source.Gender,
  source.DOB,
  source.Address,
  source.ModifiedDate,
  source.datasource,
  source.is_quarantined,
  current_timestamp(),
  current_timestamp(),
  true
)

In [0]:
%sql
select Patient_Key,count(*) from silver.patients
group by Patient_Key
order by 2 desc

In [0]:
%sql
select * from healthcarerevenuecyclemanagement_databricks.silver.patients;

In [0]:
%sql  
describe healthcarerevenuecyclemanagement_databricks.silver.patients;