###Importing the modules

In [0]:
from pyspark.sql.functions import *
from delta.tables import DeltaTable

###Read data from silver layer

In [0]:
disc_silver_df = spark.read.table("retail_analytics.silver.discounts")

###Selecting the needed columns and add metadata

In [0]:
dim_disc_df = (
    disc_silver_df
    .select(
        col("Start").alias("start_date"),
        col("End").alias("end_date"),
        col("Discount").alias("discount"),
        col("Description").alias("description"),
        col("Category").alias("category"),
        col("Sub_Category").alias("sub_category")
    )
    .withColumn("_created_at", current_timestamp())
    .withColumn("_updated_at", current_timestamp())
)

###Creating the gold table

In [0]:
spark.sql("""
CREATE TABLE IF NOT EXISTS retail_analytics.gold.dim_discounts (
    discount_sk BIGINT GENERATED ALWAYS AS IDENTITY,
    start_date DATE,
    end_date DATE,
    discount DOUBLE,
    description STRING,
    category STRING,
    sub_category STRING,
    _created_at TIMESTAMP,
    _updated_at TIMESTAMP
)
USING DELTA
""")

DataFrame[]

###Merge process(SCD-1)

In [0]:
dim_disc_tbl = DeltaTable.forName(spark, "retail_analytics.gold.dim_discounts")

(
    dim_disc_tbl.alias("tgt")
    .merge(
        dim_disc_df.alias("src"),
        """
        tgt.start_date = src.start_date AND
        tgt.end_date   = src.end_date AND
        tgt.discount   = src.discount
        """
    )
    .whenMatchedUpdate(set={
        "description": "src.Description",
        "category": "src.Category",
        "sub_category": "src.Sub_Category",
        "_updated_at": "current_timestamp()"
    })
    .whenNotMatchedInsert(values={
        "start_date": "src.start_date",
        "end_date": "src.end_date",
        "discount": "src.discount",
        "description": "src.Description",
        "category": "src.Category",
        "sub_category": "src.Sub_Category",
        "_created_at": "current_timestamp()",
        "_updated_at": "current_timestamp()"
    })
    .execute()
)

DataFrame[num_affected_rows: bigint, num_updated_rows: bigint, num_deleted_rows: bigint, num_inserted_rows: bigint]

In [0]:
spark.read.table("retail_analytics.gold.dim_discounts").limit(5).display()

discount_sk,start_date,end_date,discount,description,category,sub_category,_created_at,_updated_at
1,2023-10-01,2023-10-10,0.2,20% discount during our Autumn Essentials Sale,Feminine,Sweaters and Knitwear,2026-01-20T06:48:51.628Z,2026-01-20T06:48:51.628Z
2,2020-10-01,2020-10-10,0.2,20% discount during our Autumn Essentials Sale,Feminine,Sportswear,2026-01-20T06:48:51.628Z,2026-01-20T06:48:51.628Z
3,2024-03-15,2024-03-31,0.35,35% discount during our Early Spring Collection Refresh,Feminine,Dresses and Jumpsuits,2026-01-20T06:48:51.628Z,2026-01-20T06:48:51.628Z
4,2024-10-01,2024-10-10,0.2,20% discount during our Autumn Essentials Sale,Children,Sweaters,2026-01-20T06:48:51.628Z,2026-01-20T06:48:51.628Z
5,2021-03-15,2021-03-31,0.35,35% discount during our Early Spring Collection Refresh,Feminine,Dresses and Jumpsuits,2026-01-20T06:48:51.628Z,2026-01-20T06:48:51.628Z


In [0]:
spark.read.table("retail_analytics.gold.dim_discounts").count()

181