In [0]:
# Read the Parquet data from the bronze layer
bronze_cpt_df = spark.read.format("parquet").load("/mnt/rcmabhi/bronze/cpt_codes")

# Create a temporary view to query the data using SQL
bronze_cpt_df.createOrReplaceTempView("bronze_cpt_codes")

In [0]:
%sql
CREATE OR REPLACE TEMP VIEW quality_checks AS
SELECT
  cpt_codes,
  procedure_code_category,
  procedure_code_descriptions,
  code_status,
  CASE
    WHEN cpt_codes IS NULL OR procedure_code_descriptions IS NULL THEN TRUE
    ELSE FALSE
  END AS is_quarantined
FROM
  bronze_cpt_codes

In [0]:
%sql
CREATE TABLE IF NOT EXISTS silver.cptcodes (
  cpt_codes STRING,
  procedure_code_category STRING,
  procedure_code_descriptions STRING,
  code_status STRING,
  is_quarantined BOOLEAN,
  audit_insertdate TIMESTAMP,
  audit_modifieddate TIMESTAMP,
  is_current BOOLEAN
)
USING DELTA;

In [0]:
%sql
-- Find matching records that have changed and mark them as not current
MERGE INTO silver.cptcodes AS target
USING quality_checks AS source
ON target.cpt_codes = source.cpt_codes AND target.is_current = true
WHEN MATCHED AND (
    target.procedure_code_category != source.procedure_code_category OR
    target.procedure_code_descriptions != source.procedure_code_descriptions OR
    target.code_status != source.code_status OR
    target.is_quarantined != source.is_quarantined
) THEN
  UPDATE SET
    target.is_current = false,
    target.audit_modifieddate = current_timestamp()

In [0]:
%sql
-- Insert new records and the updated versions of existing records
INSERT INTO silver.cptcodes
SELECT
  source.cpt_codes,
  source.procedure_code_category,
  source.procedure_code_descriptions,
  source.code_status,
  source.is_quarantined,
  current_timestamp() AS audit_insertdate,
  current_timestamp() AS audit_modifieddate,
  true AS is_current
FROM quality_checks AS source
LEFT JOIN silver.cptcodes AS target
ON source.cpt_codes = target.cpt_codes AND target.is_current = true
-- This condition ensures we only insert records that don't have a current version in the target
WHERE target.cpt_codes IS NULL;