In [0]:
import sys
sys.path.append("../../")

In [0]:
from pyspark.sql.types import *
from pyspark.sql.functions import *

from src_staging.sv_customer_stage import create_customer_stage, sat_customer_info_schema

In [0]:
partition_name = 'partition_date'
partition_date = '20250828'

In [0]:
_bz_table = 'ctl_central_published.sc_bz_source_name.dt_customer_journey_daily'
_sat_sv_table = 'ctl_central_published.sc_sv_raw_vault.sat_customer_personinfo'
partition_date_fmt = f"{partition_date[:4]}-{partition_date[4:6]}-{partition_date[6:]}"

In [0]:
sv_df = create_customer_stage(_bz_table, partition_name, partition_date_fmt)
sv_df.display()

In [0]:
# incoming snapshot from your SV customer staging (one row per customer_id)
incoming = (sv_df
    .select(
        col("customer_id"),
        col("full_name"),
        col("dob"),
        col("email"),
        col("phone"),
        col("city"),
        col("record_source"),
        col("partition_date")
    )
    .withColumn("hashdiff", sha2(concat_ws("||",
        col("full_name"), col("dob").cast("string"),
        col("email"), col("phone"), col("city")
    ), 256))
    .withColumn("effective_from_ts", current_timestamp())
)

incoming.createOrReplaceTempView("incoming_customer")

In [0]:
if not spark.catalog.tableExists(_sat_sv_table):
    sat_customer_info_schema = sat_customer_info_schema()
    (
        spark.createDataFrame([], sat_customer_info_schema)
        .write
        .format("delta")
        .saveAsTable(_sat_sv_table)
    )

spark.sql(f"""
    MERGE INTO {_sat_sv_table} AS tgt
    USING incoming_customer AS src
    ON  tgt.customer_id = src.customer_id
    AND tgt.is_current = true

    WHEN MATCHED AND tgt.hashdiff <> src.hashdiff THEN
    UPDATE SET
        tgt.effective_to_ts = src.effective_from_ts,
        tgt.is_current      = false

    WHEN NOT MATCHED THEN
    INSERT (customer_id, full_name, dob, email, phone, city,
            hashdiff, effective_from_ts, effective_to_ts, is_current,
            record_source, _ingest_ts, partition_date)
    VALUES (src.customer_id, src.full_name, src.dob, src.email, src.phone, src.city,
            src.hashdiff, src.effective_from_ts, NULL, true,
            src.record_source, current_timestamp(), src.partition_date)
""")