In [0]:
from delta.tables import DeltaTable
from pyspark.sql.functions import current_timestamp, row_number, col
from pyspark.sql.window import Window

# Read Bronze Orders
bronze_df = spark.table("bronze_db.orders")

# Deduplicate Bronze (latest per order_id)
window_spec = Window.partitionBy("order_id").orderBy(col("ingestion_ts").desc())

dedup_df = (
    bronze_df
    .withColumn("rn", row_number().over(window_spec))
    .filter(col("rn") == 1)
    .drop("rn")
)

# Initial load or incremental merge
if not spark.catalog.tableExists("silver_db.orders"):
    
    silver_df = (
        dedup_df
        .withColumn("created_ts", current_timestamp())
        .withColumn("modified_ts", current_timestamp())
    )
    
    silver_df.write.format("delta").saveAsTable("silver_db.orders")

else:
    target = DeltaTable.forName(spark, "silver_db.orders")

    staged_df = dedup_df.withColumn("etl_ts", current_timestamp())

    (
        target.alias("t")
        .merge(
            staged_df.alias("s"),
            "t.order_id = s.order_id"
        )
        .whenMatchedUpdate(
            condition="""
                NOT (
                    t.status <=> s.status AND
                    t.amount <=> s.amount
                )
            """,
            set={
                "status": "s.status",
                "amount": "s.amount",
                "ingestion_ts": "s.etl_ts",
                "modified_ts": "s.etl_ts"
            }
        )
        .whenNotMatchedInsert(
            values={
                "order_id": "s.order_id",
                "customer_id": "s.customer_id",
                "order_date": "s.order_date",
                "amount": "s.amount",
                "status": "s.status",
                "ingestion_ts": "s.etl_ts",
                "created_ts": "s.etl_ts",
                "modified_ts": "s.etl_ts"
            }
        )
        .execute()
    )
