In [0]:
# Define the storage account name and container
storage_account_name = "healthcarercmra"
client_id = "55cb5f89-1a5c-41b2-a286-947e13e78c78"
tenant_id = "e1dd8e8f-9203-44c7-b497-48a69721f03b"
client_secret = "ZAC8Q~0J3U3H08m2P5Y~yYzHOdFDkd8W2IsOwbU-"

# Set up the configuration for the service principal
configs = {
    "fs.azure.account.auth.type": "OAuth",
    "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
    "fs.azure.account.oauth2.client.id": client_id,
    "fs.azure.account.oauth2.client.secret": client_secret,
    "fs.azure.account.oauth2.client.endpoint": f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
}

# Apply the configuration
for key, value in configs.items():
    spark.conf.set(key, value)

# Define the path to the container
bronze_path = f"abfss://bronze@{storage_account_name}.dfs.core.windows.net/"
print(bronze_path)

landing_path = f"abfss://landing@{storage_account_name}.dfs.core.windows.net/"
print(landing_path)

In [0]:
# Databricks notebook source
# Reading Hospital A patient data 
df_hosa=spark.read.parquet("abfss://bronze@healthcarercmra.dfs.core.windows.net/hosa/transactions")
df_hosa.createOrReplaceTempView("transactions_hosa")

In [0]:
#Reading Hospital B patient data 
df_hosb=spark.read.parquet("abfss://bronze@healthcarercmra.dfs.core.windows.net/hosb/transactions")
df_hosb.createOrReplaceTempView("transactions_hosb")

In [0]:
%sql
select * from transactions_hosa

In [0]:
%sql
select * from transactions_hosb

In [0]:
#union two departments dataframes
df_merged = df_hosa.unionByName(df_hosb)
display(df_merged)

df_merged.createOrReplaceTempView("transactions")

In [0]:
%sql
-- Create or replace a temp view for quality checks on transaction data
CREATE OR REPLACE TEMP VIEW quality_checks AS
SELECT 
    CONCAT(TransactionID, '-', datasource) AS TransactionID,
    TransactionID AS SRC_TransactionID,
    EncounterID,
    PatientID,
    ProviderID,
    DeptID,
    VisitDate,
    ServiceDate,
    PaidDate,
    VisitType,
    Amount,
    AmountType,
    PaidAmount,
    ClaimID,
    PayorID,
    ProcedureCode,
    ICDCode,
    LineOfBusiness,
    MedicaidID,
    MedicareID,
    InsertDate AS SRC_InsertDate,
    ModifiedDate AS SRC_ModifiedDate,
    datasource,
    CASE 
        WHEN EncounterID IS NULL 
          OR PatientID IS NULL 
          OR TransactionID IS NULL 
          OR VisitDate IS NULL 
        THEN TRUE
        ELSE FALSE
    END AS is_quarantined
FROM transactions;


In [0]:
%sql
-- Create the silver.transactions table if it does not already exist
CREATE TABLE IF NOT EXISTS silver.transactions (
    TransactionID        STRING,
    SRC_TransactionID    STRING,
    EncounterID          STRING,
    PatientID            STRING,
    ProviderID           STRING,
    DeptID               STRING,
    VisitDate            DATE,
    ServiceDate          DATE,
    PaidDate             DATE,
    VisitType            STRING,
    Amount               DOUBLE,
    AmountType           STRING,
    PaidAmount           DOUBLE,
    ClaimID              STRING,
    PayorID              STRING,
    ProcedureCode        INTEGER,
    ICDCode              STRING,
    LineOfBusiness       STRING,
    MedicaidID           STRING,
    MedicareID           STRING,
    SRC_InsertDate       DATE,
    SRC_ModifiedDate     DATE,
    datasource           STRING,
    is_quarantined       BOOLEAN,
    audit_insertdate     TIMESTAMP,
    audit_modifieddate   TIMESTAMP,
    is_current           BOOLEAN
)
USING DELTA;


In [0]:
%sql
-- SCD Type 2: Mark previous records as historical when a change is detected
MERGE INTO silver.transactions AS target
USING quality_checks AS source
ON target.TransactionID = source.TransactionID
   AND target.is_current = true

-- If any relevant field has changed, mark the old record as not current and update the audit timestamp
WHEN MATCHED AND (
    target.SRC_TransactionID   != source.SRC_TransactionID   OR
    target.EncounterID         != source.EncounterID         OR
    target.PatientID           != source.PatientID           OR
    target.ProviderID          != source.ProviderID          OR
    target.DeptID              != source.DeptID              OR
    target.VisitDate           != source.VisitDate           OR
    target.ServiceDate         != source.ServiceDate         OR
    target.PaidDate            != source.PaidDate            OR
    target.VisitType           != source.VisitType           OR
    target.Amount              != source.Amount              OR
    target.AmountType          != source.AmountType          OR
    target.PaidAmount          != source.PaidAmount          OR
    target.ClaimID             != source.ClaimID             OR
    target.PayorID             != source.PayorID             OR
    target.ProcedureCode       != source.ProcedureCode       OR
    target.ICDCode             != source.ICDCode             OR
    target.LineOfBusiness      != source.LineOfBusiness      OR
    target.MedicaidID          != source.MedicaidID          OR
    target.MedicareID          != source.MedicareID          OR
    target.SRC_InsertDate      != source.SRC_InsertDate      OR
    target.SRC_ModifiedDate    != source.SRC_ModifiedDate    OR
    target.datasource          != source.datasource          OR
    target.is_quarantined      != source.is_quarantined
)
THEN UPDATE SET
    target.is_current = false,
    target.audit_modifieddate = current_timestamp();


In [0]:
%sql
-- SCD Type 2: Insert new or changed records as current into silver.transactions
MERGE INTO silver.transactions AS target
USING quality_checks AS source
ON target.TransactionID = source.TransactionID
   AND target.is_current = true

-- Insert a new record if there is no current match for this TransactionID
WHEN NOT MATCHED THEN
INSERT (
    TransactionID,
    SRC_TransactionID,
    EncounterID,
    PatientID,
    ProviderID,
    DeptID,
    VisitDate,
    ServiceDate,
    PaidDate,
    VisitType,
    Amount,
    AmountType,
    PaidAmount,
    ClaimID,
    PayorID,
    ProcedureCode,
    ICDCode,
    LineOfBusiness,
    MedicaidID,
    MedicareID,
    SRC_InsertDate,
    SRC_ModifiedDate,
    datasource,
    is_quarantined,
    audit_insertdate,
    audit_modifieddate,
    is_current
)
VALUES (
    source.TransactionID,
    source.SRC_TransactionID,
    source.EncounterID,
    source.PatientID,
    source.ProviderID,
    source.DeptID,
    source.VisitDate,
    source.ServiceDate,
    source.PaidDate,
    source.VisitType,
    source.Amount,
    source.AmountType,
    source.PaidAmount,
    source.ClaimID,
    source.PayorID,
    source.ProcedureCode,
    source.ICDCode,
    source.LineOfBusiness,
    source.MedicaidID,
    source.MedicareID,
    source.SRC_InsertDate,
    source.SRC_ModifiedDate,
    source.datasource,
    source.is_quarantined,
    current_timestamp(),    -- audit_insertdate
    current_timestamp(),    -- audit_modifieddate
    true                   -- is_current
);


In [0]:
%sql
select * from silver.transactions limit 5;