###Importing the modules

In [0]:
from pyspark.sql.functions import *
from delta.tables import DeltaTable

###Reading the data from silver layer

In [0]:
store_silver_df = spark.read.table("retail_analytics.silver.stores")

###Selecting the needed columns and add metadata

In [0]:
dim_store_df = (
    store_silver_df
    .select(
        col("store_id"),
        col("country").alias("country"),
        col("city").alias("city"),
        col("store_name").alias("store_name"),
        col("number_of_employees"),
        col("zip_code").alias("zip_code"),
        col("latitude"),
        col("longitude")
    )
    .withColumn("_created_at", current_timestamp())
    .withColumn("_updated_at", current_timestamp())
)

###Creating gold table

In [0]:
spark.sql("""
CREATE TABLE IF NOT EXISTS retail_analytics.gold.dim_stores (
    store_sk BIGINT GENERATED ALWAYS AS IDENTITY,
    store_id INT,
    country STRING,
    city STRING,
    store_name STRING,
    number_of_employees INT,
    zip_code STRING,
    latitude DOUBLE,
    longitude DOUBLE,
    _created_at TIMESTAMP,
    _updated_at TIMESTAMP
)
USING DELTA
""")

DataFrame[]

###Merge process(SCD-1)

In [0]:
dim_store_tbl = DeltaTable.forName(spark, "retail_analytics.gold.dim_stores")

(
    dim_store_tbl.alias("tgt")
    .merge(
        dim_store_df.alias("src"),
        "tgt.store_id = src.store_id"
    )
    .whenMatchedUpdate(set={
        "country": "src.country",
        "city": "src.city",
        "store_name": "src.store_name",
        "number_of_employees": "src.number_of_employees",
        "zip_code": "src.zip_code",
        "latitude": "src.latitude",
        "longitude": "src.longitude",
        "_updated_at": "current_timestamp()"
    })
    .whenNotMatchedInsert(values={
        "store_id": "src.store_id",
        "country": "src.country",
        "city": "src.city",
        "store_name": "src.store_name",
        "number_of_employees": "src.number_of_employees",
        "zip_code": "src.zip_code",
        "latitude": "src.latitude",
        "longitude": "src.longitude",
        "_created_at": "current_timestamp()",
        "_updated_at": "current_timestamp()"
    })
    .execute()
)

DataFrame[num_affected_rows: bigint, num_updated_rows: bigint, num_deleted_rows: bigint, num_inserted_rows: bigint]

In [0]:
spark.read.table("retail_analytics.gold.dim_stores").limit(5).display()

store_sk,store_id,country,city,store_name,number_of_employees,zip_code,latitude,longitude,_created_at,_updated_at
1,29,España,Sevilla,Store Sevilla,8,41001,37.3906,-5.9879,2026-01-20T09:16:41.035Z,2026-01-20T09:16:41.035Z
2,24,France,Toulouse,Store Toulouse,8,31000,43.6047,1.4442,2026-01-20T09:16:41.035Z,2026-01-20T09:16:41.035Z
3,15,Deutschland,Frankfurt am Main,Store Frankfurt am Main,10,60311,50.1145,8.6785,2026-01-20T09:16:41.035Z,2026-01-20T09:16:41.035Z
4,20,United Kingdom,Bristol,Store Bristol,7,BS1 1AA,51.4545,-2.5879,2026-01-20T09:16:41.035Z,2026-01-20T09:16:41.035Z
5,21,France,Paris,Store Paris,7,75000,48.8656,2.343,2026-01-20T09:16:41.035Z,2026-01-20T09:16:41.035Z


In [0]:
spark.read.table("retail_analytics.gold.dim_stores").count()

35