In [0]:
# Define the storage account name and container
storage_account_name = "healthcarercmra"
client_id = "55cb5f89-1a5c-41b2-a286-947e13e78c78"
tenant_id = "e1dd8e8f-9203-44c7-b497-48a69721f03b"
client_secret = "ZAC8Q~0J3U3H08m2P5Y~yYzHOdFDkd8W2IsOwbU-"

# Set up the configuration for the service principal
configs = {
    "fs.azure.account.auth.type": "OAuth",
    "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
    "fs.azure.account.oauth2.client.id": client_id,
    "fs.azure.account.oauth2.client.secret": client_secret,
    "fs.azure.account.oauth2.client.endpoint": f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
}

# Apply the configuration
for key, value in configs.items():
    spark.conf.set(key, value)

# Define the path to the container
bronze_path = f"abfss://bronze@{storage_account_name}.dfs.core.windows.net/"
print(bronze_path)

landing_path = f"abfss://landing@{storage_account_name}.dfs.core.windows.net/"
print(landing_path)

In [0]:
# Databricks notebook source
# Reading Hospital A patient data 
df_hosa=spark.read.parquet("abfss://bronze@healthcarercmra.dfs.core.windows.net/hosa/encounters")
df_hosa.createOrReplaceTempView("hosa_encounters")

In [0]:
#Reading Hospital B patient data 
df_hosb=spark.read.parquet("abfss://bronze@healthcarercmra.dfs.core.windows.net/hosb/encounters")
df_hosb.createOrReplaceTempView("hosb_encounters")

In [0]:
%sql
select * from hosa_encounters

In [0]:
%sql
select * from hosb_encounters

In [0]:
%sql
-- Combine data from both hospitals
CREATE OR REPLACE TEMP VIEW encounters AS
SELECT * FROM hosa_encounters
UNION ALL
SELECT * FROM hosb_encounters;



In [0]:
%sql
-- Preview unioned data
SELECT * FROM encounters;

In [0]:
%sql
-- Create `quality_checks` View with Enriched Columns and Quarantine Flags
CREATE OR REPLACE TEMP VIEW quality_checks AS
SELECT 
  CONCAT(EncounterID, '-', datasource) AS EncounterID,
  EncounterID AS SRC_EncounterID,
  PatientID,
  EncounterDate,
  EncounterType,
  ProviderID,
  DepartmentID,
  ProcedureCode,
  InsertedDate AS SRC_InsertedDate,
  ModifiedDate AS SRC_ModifiedDate,
  datasource,
  CASE 
    WHEN EncounterID IS NULL OR PatientID IS NULL THEN TRUE
    ELSE FALSE
  END AS is_quarantined
FROM encounters;

In [0]:
%sql
-- Preview quality checked data
SELECT * FROM quality_checks WHERE datasource = 'hos-b';

In [0]:
%sql
-- Create Silver Delta Table for Encounters with SCD2 Support

CREATE TABLE IF NOT EXISTS silver.encounters (
  EncounterID STRING,
  SRC_EncounterID STRING,
  PatientID STRING,
  EncounterDate DATE,
  EncounterType STRING,
  ProviderID STRING,
  DepartmentID STRING,
  ProcedureCode INT,
  SRC_InsertedDate DATE,
  SRC_ModifiedDate DATE,
  datasource STRING,
  is_quarantined BOOLEAN,
  audit_insertdate TIMESTAMP,
  audit_modifieddate TIMESTAMP,
  is_current BOOLEAN
)
USING DELTA;

In [0]:
%sql
-- Update Old Records (SCD2 – Set `is_current = false`)
MERGE INTO silver.encounters AS target
USING quality_checks AS source
ON target.EncounterID = source.EncounterID AND target.is_current = true

WHEN MATCHED AND (
    target.SRC_EncounterID     != source.SRC_EncounterID OR
    target.PatientID           != source.PatientID OR
    target.EncounterDate       != source.EncounterDate OR
    target.EncounterType       != source.EncounterType OR
    target.ProviderID          != source.ProviderID OR
    target.DepartmentID        != source.DepartmentID OR
    target.ProcedureCode       != source.ProcedureCode OR
    target.SRC_InsertedDate    != source.SRC_InsertedDate OR
    target.SRC_ModifiedDate    != source.SRC_ModifiedDate OR
    target.datasource          != source.datasource OR
    target.is_quarantined      != source.is_quarantined
)
THEN UPDATE SET
  target.is_current = false,
  target.audit_modifieddate = current_timestamp();

In [0]:
%sql
-- Insert New Records (SCD2 – `is_current = true`)

MERGE INTO silver.encounters AS target
USING quality_checks AS source
ON target.EncounterID = source.EncounterID AND target.is_current = true

WHEN NOT MATCHED THEN
INSERT (
  EncounterID,
  SRC_EncounterID,
  PatientID,
  EncounterDate,
  EncounterType,
  ProviderID,
  DepartmentID,
  ProcedureCode,
  SRC_InsertedDate,
  SRC_ModifiedDate,
  datasource,
  is_quarantined,
  audit_insertdate,
  audit_modifieddate,
  is_current
)
VALUES (
  source.EncounterID,
  source.SRC_EncounterID,
  source.PatientID,
  source.EncounterDate,
  source.EncounterType,
  source.ProviderID,
  source.DepartmentID,
  source.ProcedureCode,
  source.SRC_InsertedDate,
  source.SRC_ModifiedDate,
  source.datasource,
  source.is_quarantined,
  current_timestamp(),
  current_timestamp(),
  true
);


In [0]:
%sql
-- View Result (Group by Hospital and Encounter ID)
SELECT 
  SRC_EncounterID,
  datasource,
  COUNT(PatientID) AS encounter_count
FROM silver.encounters
GROUP BY ALL
ORDER BY 3 DESC;