In [0]:
dbutils.widgets.text("env","dev")
env=dbutils.widgets.get("env")

In [0]:
%run "/Workspace/Users/azuredataengineer44@gmail.com/databricks-traffic/Databricks Retail Notebooks/common/config_loader"

In [0]:
config = load_config(env)

In [0]:
catalog = config["unity_catalog"]["catalog"]
# print(catalog)

bronze_table = f"{catalog}.bronze.customers_raw"
# print(bronze_table)
df_bronze = spark.read.table(bronze_table)


In [0]:
from pyspark.sql.functions import col,current_date,lit

df_incoming = (
    df_bronze.select(
        "customer_id",
        "first_name",
        "last_name",
        "email",
        "phone",
        "city",
        "state",
        "country",
        "customer_status",
        "signup_date",
        "ingestion_ts",
        "source_file_path"
        
    )
    .withColumn("effective_from",current_date())
    .withColumn("effective_to",lit(None).cast("date"))
    .withColumn("is_current",lit(True))
)


# df_incoming = df_incoming1.withColumnRenamed("source_file_path","source_file")

In [0]:
silver_table= f"{catalog}.silver.customers_scd2"

spark.sql(f"""
          CREATE TABLE IF NOT EXISTS {silver_table}
          (
            customer_id string,
            first_name string,
            last_name string,
            email string,
            phone string,
            city string,
            state string,
            country string,
            customer_status string,
            signup_date DATE,
  effective_from DATE,
  effective_to DATE,
  is_current BOOLEAN,
  ingestion_ts TIMESTAMP,
  source_file STRING
        )
        USING DELTA
                """)

In [0]:
from delta.tables import DeltaTable

silver_dt = DeltaTable.forName(spark, silver_table)

merge_condition = """
t.customer_id = s.customer_id
"""

change_condition ="""
t.first_name <> s.first_name OR
t.last_name <> s.last_name OR
t.email <> s.email OR
t.phone <> s.phone OR
t.city <> s.city OR
t.state <> s.state OR
t.country <> s.country OR
t.customer_status <> s.customer_status OR
"""

merge_builder = (
    silver_dt.alias("t")
    .merge(
    df_incoming.alias("s"),
    merge_condition
 )
    .whenMatchedUpdate(
        condition = change_condition,
        set = {
         "effective_to":current_date(),
         "is_current": lit(False)
              }
    )
 .whenNotMatchedInsert(values={
    "customer_id": "s.customer_id",
    "first_name": "s.first_name",
    "last_name": "s.last_name",
    "email": "s.email",
    "phone": "s.phone",
    "city": "s.city",
    "state": "s.state",
    "country": "s.country",
    "customer_status": "s.customer_status",
    "signup_date": "s.signup_date",
    "effective_from": "s.effective_from",
    "effective_to": "s.effective_to",
    "is_current": "s.is_current",
    "ingestion_ts": "s.ingestion_ts",
    "source_file": "s.source_file_path"   
    })
 )

merge_builder.execute()

In [0]:
spark.sql("""select * from dev_catalog.silver.customers_scd2""").display()

In [0]:
df_incoming.display()