In [0]:
dbutils.fs.ls("/mnt/rcmabhi/bronze")

In [0]:
path ="/mnt/rcmabhi/bronze/icd_codes"

In [0]:
df = spark.read.parquet(path)

In [0]:
df.createOrReplaceTempView("staging_icd_codes")

In [0]:
%sql
CREATE TABLE IF NOT EXISTS silver.icd_codes (
    icd_code STRING,
    icd_code_type STRING,
    code_description STRING,
    inserted_date DATE,
    updated_date DATE,
    is_current_flag BOOLEAN
)

In [0]:
%sql
-- COMMAND ----------
-- Assuming 'silver.icd_codes' table has the following structure:
-- (
--   icd_code STRING,
--   icd_code_type STRING,
--   code_description STRING,
--   inserted_date TIMESTAMP, -- Or DATE, depending on your precision needs
--   updated_date TIMESTAMP,  -- This will act as the effective_end_date for expired records
--   is_current_flag BOOLEAN
-- )

MERGE INTO silver.icd_codes AS tgt
USING staging_icd_codes AS src
ON tgt.icd_code = src.icd_code AND tgt.is_current_flag = TRUE
WHEN MATCHED
  AND (
    -- Define which column changes trigger a new SCD Type 2 version
    tgt.icd_code_type     <> src.icd_code_type OR
    tgt.code_description  <> src.code_description
    -- Add OR conditions for any other columns that should trigger a new historical record
  )
THEN UPDATE SET
  tgt.is_current_flag = FALSE,         -- Mark the old version as no longer current
  tgt.updated_date = current_timestamp() -- Record the time the old version was "closed"

WHEN NOT MATCHED THEN INSERT (
    icd_code,
    icd_code_type,
    code_description,
    inserted_date,
    updated_date,
    is_current_flag
)
VALUES (
    src.icd_code,
    src.icd_code_type,
    src.code_description,
    current_timestamp(), -- Set inserted_date for the new version
    current_timestamp(), -- Set updated_date for the new version (initially same as inserted_date)
    TRUE                 -- Mark this new record/version as current
);