###Importing the modules

In [0]:
from pyspark.sql.functions import *
from delta.tables import DeltaTable

###Reading data from silver layer

In [0]:
emp_silver_df = spark.read.table("retail_analytics.silver.employees")

###Selecting the needed columns and add metadata

In [0]:
dim_emp_df = (
    emp_silver_df
    .select(
        col("employee_id"),
        col("store_id"),
        col("name").alias("name"),
        col("position").alias("position")
    )
    .withColumn("_created_at", current_timestamp())
    .withColumn("_updated_at", current_timestamp())
)

###Creating gold table

In [0]:
spark.sql("""
CREATE TABLE IF NOT EXISTS retail_analytics.gold.dim_employees (
    employee_sk BIGINT GENERATED ALWAYS AS IDENTITY,
    employee_id INT,
    store_id INT,
    name STRING,
    position STRING,
    _created_at TIMESTAMP,
    _updated_at TIMESTAMP
)
USING DELTA
""")

DataFrame[]

###Merge process(SCD-1)

In [0]:
dim_emp_tbl = DeltaTable.forName(spark, "retail_analytics.gold.dim_employees")

(
    dim_emp_tbl.alias("tgt")
    .merge(
        dim_emp_df.alias("src"),
        "tgt.employee_id = src.employee_id"
    )
    .whenMatchedUpdate(set={
        "store_id": "src.store_id",
        "name": "src.name",
        "position": "src.position",
        "_updated_at": "current_timestamp()"
    })
    .whenNotMatchedInsert(values={
        "employee_id": "src.employee_id",
        "store_id": "src.store_id",
        "name": "src.name",
        "position": "src.position",
        "_created_at": "current_timestamp()",
        "_updated_at": "current_timestamp()"
    })
    .execute()
)

DataFrame[num_affected_rows: bigint, num_updated_rows: bigint, num_deleted_rows: bigint, num_inserted_rows: bigint]

In [0]:
spark.read.table("retail_analytics.gold.dim_employees").limit(5).display()

employee_sk,employee_id,store_id,name,position,_created_at,_updated_at
1,94,8,杨凤英,Sales Associate,2026-01-20T06:59:02.080Z,2026-01-20T06:59:02.080Z
2,29,3,Cynthia Serrano,Sales Associate,2026-01-20T06:59:02.080Z,2026-01-20T06:59:02.080Z
3,362,32,Soraia Batista-maia,Assistant Manager,2026-01-20T06:59:02.080Z,2026-01-20T06:59:02.080Z
4,284,25,Lucie Dijoux,Stock Clerk,2026-01-20T06:59:02.080Z,2026-01-20T06:59:02.080Z
5,88,8,胡秀云,Cashier,2026-01-20T06:59:02.080Z,2026-01-20T06:59:02.080Z


In [0]:
spark.read.table("retail_analytics.gold.dim_employees").count()

404