In [0]:
CREATE TABLE IF NOT EXISTS gold.dim_sellers(
  seller_sk BIGINT GENERATED ALWAYS AS IDENTITY,
  seller_id STRING,
  seller_zip_code_prefix STRING,
  seller_city  STRING,
  seller_state STRING,
  geolocation_sk BIGINT,
  start_date DATE,
  end_date DATE,
  is_current BOOLEAN
) USING DELTA;



In [0]:
%python
from pyspark.sql.functions import current_date, lit

silver_df = spark.read.table("silver.sellers_cleaned") 

updates_df = (
    silver_df
    .withColumn("start_date", current_date())
    .withColumn("end_date", lit(None).cast("date"))
    .withColumn("is_current", lit(True))
)
updates_df.display()

In [0]:
%python
updates_df.createOrReplaceTempView("updates_view")

In [0]:
MERGE INTO gold.dim_sellers AS target
USING updates_view AS source
ON target.seller_id = source.seller_id AND target.is_current = true

WHEN MATCHED AND (
  target.seller_city != source.seller_city OR
  target.seller_state != source.seller_state
) THEN
  UPDATE SET
    end_date = current_date(),
    is_current = false

WHEN NOT MATCHED THEN
  INSERT (
    seller_id,
    seller_zip_code_prefix,
    seller_city,
    seller_state,
    geolocation_sk,
    start_date,
    end_date,
    is_current
  )
  VALUES (
    source.seller_id,
    source.seller_zip_code_prefix,
    source.seller_city,
    source.seller_state,
    source.geolocation_sk,
    current_date(),
    NULL,
    true
  );




In [0]:
SELECT * FROM gold.dim_sellers LIMIT 10;