In [0]:
import sys
sys.path.append("../../")

In [0]:
from pyspark.sql.types import *
from pyspark.sql.functions import *

from src_staging.sv_agent_stage import create_agent_stage, sat_agent_details_schema
from pyspark.sql.window import Window
from pyspark.sql.functions import row_number, col, current_timestamp

In [0]:
partition_name = 'partition_date'
partition_date = '20250828'

In [0]:
_bz_table = 'ctl_central_published.sc_bz_source_name.dt_customer_journey_daily'
_sat_sv_table = 'ctl_central_published.sc_sv_raw_vault.sat_agent_details'
partition_date_fmt = f"{partition_date[:4]}-{partition_date[4:6]}-{partition_date[6:]}"

In [0]:
sv_df = create_agent_stage(_bz_table, partition_name, partition_date_fmt)
sv_df.display()

In [0]:
# incoming snapshot from your SV customer staging (one row per customer_id)
incoming = (sv_df
    .select(
        col("agent_id"),
        col("agent_name"),
        col("region"),
        col("license_number"),
        col("record_source"),
        col("event_ts"),
        col("record_source"),
        col("partition_date")
    )
    .withColumn(
        "hashdiff",
        sha2(concat_ws("||",
            (col("agent_name")),
            (col("region"))
        ), 256)
    )
    .withColumn("effective_from_ts", current_timestamp())
)

w = Window.partitionBy("license_number").orderBy(col("event_ts").desc())

incoming_dedup = (incoming
    .withColumn("rn", row_number().over(w))
    .filter("rn = 1")
    .drop("rn")
)

incoming_dedup.createOrReplaceTempView("incoming_agent")

In [0]:
if not spark.catalog.tableExists(_sat_sv_table):
    (
        spark.createDataFrame([], sat_agent_details_schema())
        .write.format("delta")
        .saveAsTable(_sat_sv_table)
    )

spark.sql(f"""
    MERGE INTO {_sat_sv_table} AS tgt
    USING incoming_agent AS src
    ON  tgt.license_number = src.license_number
    AND tgt.is_current = true

    WHEN MATCHED AND tgt.hashdiff <> src.hashdiff THEN
    UPDATE SET
        tgt.effective_to_ts = src.effective_from_ts,
        tgt.is_current      = false

    WHEN NOT MATCHED THEN
    INSERT (
        license_number, agent_name, region, agent_id,
        hashdiff, effective_from_ts, effective_to_ts, is_current,
        record_source, _ingest_ts, partition_date
    )
    VALUES (
        src.license_number, src.agent_name, src.region, src.agent_id,
        src.hashdiff, src.effective_from_ts, NULL, true,
        src.record_source, current_timestamp(), src.partition_date
        )
""")