In [0]:
dbutils.widgets.text("env","dev")
env = dbutils.widgets.get("env")

In [0]:
%run "/Workspace/Users/azuredataengineer44@gmail.com/databricks-traffic/Databricks Retail Notebooks/common/config_loader"

In [0]:
config = load_config(env)
catalog = config["unity_catalog"]["catalog"]

bronze_table = f"{catalog}.bronze.products_raw"
df_bronze = spark.read.table(bronze_table)

In [0]:
from pyspark.sql.functions import (col ,current_date,lit)

df_incoming = (df_bronze
.select("product_id",
        "product_name",
        "category",
        "brand",
        "price",
        "currency",
        "is_active",
        "ingestion_ts",
        "source_file_path"
)
.withColumn("effective_from",current_date())
.withColumn("effective_to",lit(None).cast("date"))
.withColumn("is_current",lit(True))
)

In [0]:
silver_table = f"{catalog}.silver.products_scd2"

spark.sql(f"""
          CREATE TABLE IF NOT EXISTS {silver_table} 
          (
            product_id string,
            product_name string,
            category string,
            brand string,
            price integer,
            currency string,
            is_active boolean,
            effective_from DATE,
            effective_to DATE,
            is_current BOOLEAN,
            ingestion_ts TIMESTAMP,
            source_file STRING
           )
            USING DELTA
            """)

In [0]:
from delta.tables  import DeltaTable
from pyspark.sql.functions import current_date, lit

silver_dt = DeltaTable.forName(spark, silver_table)

merge_condition = """
                   t.product_id = s.product_id
                   and t.is_current = true                   
                   """
change_condition = ("""
                    t.product_name <> s.product_name OR
                    t.category <> s.category OR
                    t.brand <> s.brand OR
                    t.price <> s.price OR
                    t.currency <> s.currency OR
                    t.is_active <> s.is_active
                    """)

In [0]:
merge_builder = (
                silver_dt.alias("t")
                .merge(
                    df_incoming.alias("s"),merge_condition
                )
                .whenMatchedUpdate(condition = change_condition,
                                   set = {
                                       "effective_to":current_date(),
                                       "is_current": lit(False)
                                   })
                .whenNotMatchedInsert(
        values={
            "product_id": "s.product_id",
            "product_name": "s.product_name",
            "category": "s.category",
            "brand": "s.brand",
            "price": "s.price",
            "currency": "s.currency",
            "is_active": "s.is_active",
            "ingestion_ts": "s.ingestion_ts",
            "source_file": lit(None),  # or another default value
            "source_file": "s.source_file_path",
            "effective_from": "s.effective_from",
            "effective_to": "s.effective_to",
            "is_current": "s.is_current"
        }
    )
)

merge_builder.execute()

In [0]:
# spark.sql("""select * from dev_catalog.silver.products_scd2""").display()