In [0]:
from pyspark.sql.types import*
from pyspark.sql.functions import*

spark.conf.set(
    "fs.azure.account.key.hospitaldatastorag.dfs.core.windows.net",
    dbutils.secrets.get(scope="hospitalanalyticsvaultscope",key="storageacct")
)

bronze_path = "abfss://bronze@hospitaldatastorag.dfs.core.windows.net/patient_flow"
silver_path = "abfss://silver@hospitaldatastorag.dfs.core.windows.net/patient_flow"

#read the stream from Bronze layer
bronze_df = spark.readStream.format("delta").load(bronze_path)

#Define Schema
schema = StructType([
    StructField("patient_id", StringType()),
    StructField("gender", StringType()),
    StructField("age", IntegerType()),
    StructField("department", StringType()),
    StructField("admission_time", StringType()),
    StructField("discharge_time", StringType()),
    StructField("bed_id", IntegerType()),
    StructField("hospital_id", IntegerType())
])

parsed_df = bronze_df.withColumn("data",from_json(col("raw_json"),schema)).select("data.*")

cleaned_df = parsed_df.withColumn("admission_time",to_timestamp("admission_time"))
cleaned_df = cleaned_df.withColumn("discharge_time",to_timestamp("discharge_time"))

# Handling invalid age and Admission time :
cleaned_df = cleaned_df.withColumn("admission_time",when(col("admission_time").isNull() | (col("admission_time") > current_timestamp()), current_timestamp()).otherwise(col("admission_time")))
cleaned_df = cleaned_df.withColumn("age",when(col("age")>100,floor(rand()*90+1).cast("int")).otherwise(col("age")))

# Schema Evolution
expected_cols = ["patient_id", "gender", "age", "department", "admission_time", "discharge_time", "bed_id", "hospital_id"]

for col_name in expected_cols:
    if col_name not in cleaned_df.columns:
        cleaned_df = cleaned_df.withColumn(col_name, lit(None))

# Write to Silver layer
cleaned_df.writeStream.format("delta").outputMode("append").option("mergeSchema",True).option("checkpointLocation",silver_path + "_checkpoint").start(silver_path)

<pyspark.sql.streaming.query.StreamingQuery at 0x7f74ead74910>

In [0]:
display(spark.read.format("delta").load(silver_path))

patient_id,gender,age,department,admission_time,discharge_time,bed_id,hospital_id
4782a152-1bbf-4c3d-8a31-86ed00c0e5fd,Male,35,ICU,2025-09-05T03:53:44.894612Z,2025-09-05T03:53:44.894612Z,427,2
2b46809f-be5a-46d2-8845-2544bb3a9f77,Female,55,Cardiology,2025-09-03T04:53:44.894694Z,2025-09-03T04:53:44.894694Z,387,3
1eb30331-8078-4f7c-b177-35d99b712dc3,Male,19,Cardiology,2025-09-03T00:53:44.894734Z,2025-09-03T00:53:44.894734Z,134,1
a7ab6dbf-4c89-4562-8d4e-aa3a5cebb87c,Female,96,Maternity,2025-09-05T00:53:44.894745Z,2025-09-05T00:53:44.894745Z,264,3
9608e306-9c93-430e-b1af-c591a7c98afd,Female,8,Oncology,2025-09-03T04:53:44.89471Z,2025-09-03T04:53:44.89471Z,190,2
1e9b8e64-61fe-4dd5-b071-42798b11fa82,Male,64,Oncology,2025-09-05T10:53:44.894722Z,2025-09-05T10:53:44.894722Z,192,2
9826f845-815b-4a0b-8b7e-ddcb4d2699f8,Female,60,Cardiology,2025-09-03T15:53:44.894757Z,2025-09-03T15:53:44.894757Z,51,7
b6177804-4427-40f8-94f2-015520edc922,Female,83,Surgery,2025-09-03T17:53:44.894768Z,2025-09-03T17:53:44.894768Z,247,2
44495925-9406-4450-9a90-99ebef202d6e,Female,59,Surgery,2025-09-04T03:53:44.89478Z,2025-09-04T03:53:44.89478Z,21,5
e2e5a678-0013-4d7a-8678-c6e298b3fbf0,Female,93,ICU,2025-09-04T06:53:44.894791Z,2025-09-04T06:53:44.894791Z,244,4
