In [0]:
df=spark.read.parquet("/mnt/rcmabhi/bronze/npi_extract")
df.createOrReplaceTempView('npi_extract')

In [0]:
%sql
CREATE TABLE IF NOT EXISTS silver.npi_extract (
  npi_id STRING,
  first_name STRING,
  last_name STRING,
  position STRING,
  organisation_name STRING,
  last_updated STRING,
  inserted_date DATE,
  updated_date DATE,
  is_current_flag BOOLEAN
)

In [0]:
%sql
-- COMMAND ----------
MERGE INTO silver.npi_extract AS tgt
USING npi_extract AS src -- Assuming 'npi_extract' is your staging table with new/updated data
ON tgt.npi_id = src.npi_id AND tgt.is_current_flag = TRUE
WHEN MATCHED
  AND (
    -- Use COALESCE for null-safe comparison if any of these columns can be null
    COALESCE(tgt.first_name, '')        <> COALESCE(src.first_name, '') OR
    COALESCE(tgt.last_name, '')         <> COALESCE(src.last_name, '') OR
    COALESCE(tgt.position, '')          <> COALESCE(src.position, '') OR
    COALESCE(tgt.organisation_name, '') <> COALESCE(src.organisation_name, '') OR
    COALESCE(tgt.last_updated, '1900-01-01') <> COALESCE(src.last_updated, '1900-01-01') -- Use a default date for comparison if NULLs are possible
  )
THEN UPDATE SET
  tgt.is_current_flag = FALSE,      -- Mark the old version as no longer current
  tgt.updated_date = current_date() -- Record the date it became historical

WHEN NOT MATCHED THEN INSERT (
  npi_id,
  first_name,
  last_name,
  position,
  organisation_name,
  last_updated,
  inserted_date,
  updated_date,
  is_current_flag
)
VALUES (
  src.npi_id,
  src.first_name,
  src.last_name,
  src.position,
  src.organisation_name,
  src.last_updated,
  current_date(),  -- Set inserted_date for the new version
  current_date(),  -- Set updated_date for the new version (initially same as inserted_date)
  TRUE             -- Mark this new record/version as current
);