In [0]:
from pyspark.sql.functions import sha2, col, current_timestamp, monotonically_increasing_id

In [0]:
%sql

create schema if not exists rwd.silver;

In [0]:
%sql

select * from rwd.bronze.cancer_patient_raw;
     

Patient_ID,Name,Age,Gender,Cancer_Patient
P001,Asha Rao,52,Female,Yes
P002,Rajesh Kumar,61,Male,Yes
P003,Meena Iyer,47,Female,Yes
P004,Vikram Singh,68,Male,Yes
P005,Anita Sharma,55,Female,Yes
P006,Suresh Patel,59,Male,Yes
P007,Leela Nair,44,Female,Yes
P008,Arun Das,63,Male,Yes
P009,Kavita Joshi,50,Female,Yes
P010,Manoj Verma,57,Male,Yes


In [0]:
bronze_table = 'rwd.bronze.cancer_patient_raw'
silver_table = 'rwd.silver.cancer_patient'
checkpoint_path = "/Volumes/rwd/silver/my_volume/silver/cancer_patient/checkpoint/"

In [0]:
df_bronze = (
    spark.readStream.table(bronze_table)
)


In [0]:
df_silver_clean = (
    df_bronze
        .dropDuplicates(["Patient_ID"])
        .withColumn("load_timestamp", current_timestamp())
)


In [0]:
from delta.tables import DeltaTable

def merge_cancer_patient(batch_df, batch_id):
    if not spark.catalog.tableExists(silver_table):
        batch_df.write.format("delta").mode("overwrite").saveAsTable(silver_table)
        return

    # Load Delta table by name and upsert
    cancer_patient = DeltaTable.forName(spark, silver_table)

    (cancer_patient.alias("t")
        .merge(
            batch_df.alias("s"),
            "t.Patient_ID = s.Patient_ID"
        )
        .whenMatchedUpdateAll()
        .whenNotMatchedInsertAll()
        .execute())



(
    df_silver_clean.writeStream
        .foreachBatch(merge_cancer_patient)
        .outputMode("update")
        .trigger(availableNow=True)
        .option("checkpointLocation", checkpoint_path)
        .start()
)

<pyspark.sql.connect.streaming.query.StreamingQuery at 0xffc52c2b4140>

In [0]:
%sql

select * from rwd.silver.cancer_patient;

Patient_ID,Name,Age,Gender,Cancer_Patient,load_timestamp
P009,Kavita Joshi,50,Female,Yes,2026-02-14T04:14:01.027Z
P005,Anita Sharma,55,Female,Yes,2026-02-14T04:14:01.027Z
P007,Leela Nair,44,Female,Yes,2026-02-14T04:14:01.027Z
P003,Meena Iyer,47,Female,Yes,2026-02-14T04:14:01.027Z
P006,Suresh Patel,59,Male,Yes,2026-02-14T04:14:01.027Z
P004,Vikram Singh,68,Male,Yes,2026-02-14T04:14:01.027Z
P002,Rajesh Kumar,61,Male,Yes,2026-02-14T04:14:01.027Z
P010,Manoj Verma,57,Male,Yes,2026-02-14T04:14:01.027Z
P001,Asha Rao,52,Female,Yes,2026-02-14T04:14:01.027Z
P008,Arun Das,63,Male,Yes,2026-02-14T04:14:01.027Z
