In [0]:
from pyspark.sql import SparkSession, functions as f

# Read the CSV file
cptcodes_df = spark.read.csv("/mnt/landing/cptcodes/*.csv", header=True)

# Replace whitespaces in column names with underscores and convert to lowercase
for col in cptcodes_df.columns:
    new_col = col.replace(" ", "_").lower()
    cptcodes_df = cptcodes_df.withColumnRenamed(col, new_col)
cptcodes_df.createOrReplaceTempView("cptcodes")
display(cptcodes_df)

In [0]:
cptcodes_df.write.format("parquet").mode("overwrite").save("/mnt/bronze/cpt_codes")

In [0]:
%sql
CREATE OR REPLACE TEMP VIEW quality_checks AS
SELECT
  procedure_code_category,
  cpt_codes,
  procedure_code_descriptions,
  code_status,
  CASE 
      WHEN cpt_codes IS NULL OR procedure_code_descriptions IS NULL THEN TRUE
      ELSE FALSE
  END AS is_quarantined
FROM cptcodes

In [0]:
%sql
SELECT * FROM quality_checks

In [0]:
%sql
CREATE TABLE IF NOT EXISTS silver.cpt_codes(
  procedure_code_category STRING,
  cpt_codes STRING,
  procedure_code_descriptions STRING,
  code_status STRING,
  is_quarantined BOOLEAN,
  audit_insertdate TIMESTAMP,
  audit_modifieddate TIMESTAMP,
  is_current BOOLEAN
)

In [0]:
%sql
-- SCD Type2 - update old records and insert new
MERGE INTO silver.cpt_codes AS target
USING quality_checks AS source
ON target.cpt_codes = source.cpt_codes AND is_current = TRUE
WHEN MATCHED AND (
  target.procedure_code_category != source.procedure_code_category OR
  target.procedure_code_descriptions != source.procedure_code_descriptions OR
  target.code_status != source.code_status
)
THEN UPDATE SET
  target.is_current = FALSE,
  target.audit_modifieddate = current_timestamp()

WHEN NOT MATCHED
THEN INSERT (
  procedure_code_category,
  cpt_codes,
  procedure_code_descriptions,
  code_status,
  is_quarantined,
  audit_insertdate,
  audit_modifieddate,
  is_current
)
VALUES (
  source.procedure_code_category,
  source.cpt_codes,
  source.procedure_code_descriptions,
  source.code_status,
  source.is_quarantined,
  current_timestamp(),
  current_timestamp(),
  TRUE
)