In [0]:
from pyspark.sql.functions import current_timestamp

def scd1_merge_bronze_to_silver_sql(bronze_table: str, silver_table: str, primary_keys: list):

    df_bronze = spark.table(bronze_table).withColumn("updated_at", current_timestamp()).drop("inserted_at")
    df_bronze = df_bronze.dropDuplicates(subset=primary_keys)
    df_bronze.createOrReplaceTempView("bronze_tmp")
    table_exist = spark.catalog.tableExists(silver_table)

    if not table_exist:
        df_bronze.write.format("delta").mode("overwrite").saveAsTable(silver_table)
        print(f"Created new silver table: {silver_table}")
        return
    
    pk_conditions = " AND ".join([f"target.{col} = source.{col}" for col in primary_keys])
    merge_sql = f"""
        MERGE INTO {silver_table} AS target
        USING bronze_tmp AS source
        ON {pk_conditions}
        WHEN MATCHED THEN
          UPDATE SET *
        WHEN NOT MATCHED THEN
          INSERT *
    """
    spark.sql(merge_sql)
    print(f"SCD Type 1 merge completed: {bronze_table} -> {silver_table}")

def cleanup(table_name):
    
    table_path = f"/user/hive/warehouse/{table_name}"
    try:
        dbutils.fs.rm(f"dbfs:{table_path}", True)
    except:
        print(f"{table} files Not found")
    try:
        spark.sql(f"DROP TABLE {table}")
    except:
        print(f"{table} Not found")


In [0]:
scd1_config = [
    {
        "bronze_table": "media_customer_reviews_bronze",
        "silver_table": "media_customer_reviews_silver",
        "primary_keys": ["new_id"]
    },
    {
        "bronze_table": "media_gold_reviews_chunked_bronze",
        "silver_table": "media_gold_reviews_chunked_silver",
        "primary_keys": ["franchiseID", "chunk_id"]
    },
    {
        "bronze_table": "sales_customers_bronze",
        "silver_table": "sales_customers_silver",
        "primary_keys": ["customerID"]
    },
    {
        "bronze_table": "sales_franchises_bronze",
        "silver_table": "sales_franchises_silver",
        "primary_keys": ["franchiseID"]
    },
    {
        "bronze_table": "sales_suppliers_bronze",
        "silver_table": "sales_suppliers_silver",
        "primary_keys": ["supplierID"]
    },
    {
        "bronze_table": "sales_transactions_bronze",
        "silver_table": "sales_transactions_silver",
        "primary_keys": ["transactionID"]
    }
]

for config in scd1_config:
    scd1_merge_bronze_to_silver_sql(
        bronze_table=config["bronze_table"],
        silver_table=config["silver_table"],
        primary_keys=config["primary_keys"]
    )

SCD Type 1 merge completed: media_customer_reviews_bronze -> media_customer_reviews_silver
SCD Type 1 merge completed: media_gold_reviews_chunked_bronze -> media_gold_reviews_chunked_silver
SCD Type 1 merge completed: sales_customers_bronze -> sales_customers_silver
SCD Type 1 merge completed: sales_franchises_bronze -> sales_franchises_silver
SCD Type 1 merge completed: sales_suppliers_bronze -> sales_suppliers_silver
SCD Type 1 merge completed: sales_transactions_bronze -> sales_transactions_silver
