In [0]:
from pyspark.sql.functions import *

# Azure event hub configuration

event_hub_namespace = "healthcare-analytics-namespace.servicebus.windows.net"
event_hub_name = "healthcare-analytics-eh"
event_hub_conn_str = "Endpoint=sb://healthcare-analytics-namespace.servicebus.windows.net/;SharedAccessKeyName=RootManageSharedAccessKey;SharedAccessKey=xxxxx"

# Kafka / Event Hubs options
kafka_options = {
    "kafka.bootstrap.servers": f"{event_hub_namespace}:9093",
    "subscribe": event_hub_name,
    "kafka.security.protocol": "SASL_SSL",
    "kafka.sasl.mechanism": "PLAIN",
    "kafka.sasl.jaas.config": (
        f'kafkashaded.org.apache.kafka.common.security.plain.PlainLoginModule required '
        f'username="$ConnectionString" password="{event_hub_conn_str}";'
    ),
    "startingOffsets": "earliest",    
    "failOnDataLoss": "false"
}

#Read from eventhub
raw_df = (spark.readStream
         .format("kafka")
         .options(**kafka_options)
         .load()
)

# Cast 'value' from binary to JSON string

json_df = raw_df.selectExpr("CAST(value AS STRING) as raw_json")

# ADLS Gen2 configuration

spark.conf.set(
    "fs.azure.account.key.healthcarestoragerk.dfs.core.windows.net",
    "xxxxx"
)

# Bronze path for raw encounter events in your Data Lake
bronze_path = "abfss://bronze@healthcarestoragerk.dfs.core.windows.net/encounters_raw"
checkpoint_path = "abfss://bronze@healthcarestoragerk.dfs.core.windows.net/_checkpoints/encounters_raw"

# Write raw JSON stream to Bronze Delta

query = (
    json_df
    .writeStream
    .format("delta")
    .outputMode("append")
    .option("checkpointLocation", checkpoint_path)
    .start(bronze_path)
)

# show status
print("Streaming query started. ID:", query.id)

Streaming query started. ID: 44b855ec-bbca-43d9-a85c-2e0c4c8b3050


In [0]:
display(spark.read.format("delta").load(bronze_path))

raw_json
"{""encounter_id"": ""748f8357-6cc7-551d-f31a-32fa2cf84126"", ""patient_id"": ""b9c610cd-28a6-4636-ccb6-c7a0d2a4cb85"", ""gender"": ""M"", ""age"": 0, ""department"": ""wellness"", ""admission_time"": ""2019-02-17T05:07:38Z"", ""discharge_time"": ""2019-02-17T05:22:38Z"", ""organization_id"": ""f7ae497d-8dc6-3721-9402-43b621a4e7d2"", ""provider_id"": ""82608ebb-037c-3cef-9d34-3736d69b29e8"", ""payer_id"": ""7c4411ce-02f1-39b5-b9ec-dfbea9ad3c1a"", ""base_encounter_cost"": 129.16, ""total_claim_cost"": 877.79, ""payer_coverage"": 833.9}"
"{""encounter_id"": ""5a4735ae-423f-6563-28ab-b3d11b49b2d4"", ""patient_id"": ""b9c610cd-28a6-4636-ccb6-c7a0d2a4cb85"", ""gender"": ""M"", ""age"": 0, ""department"": ""wellness"", ""admission_time"": ""2019-03-24T05:07:38Z"", ""discharge_time"": ""2019-03-24T05:22:38Z"", ""organization_id"": ""f7ae497d-8dc6-3721-9402-43b621a4e7d2"", ""provider_id"": ""82608ebb-037c-3cef-9d34-3736d69b29e8"", ""payer_id"": ""7c4411ce-02f1-39b5-b9ec-dfbea9ad3c1a"", ""base_encounter_cost"": 129.16, ""total_claim_cost"": 269.68, ""payer_coverage"": 256.2}"
"{""encounter_id"": ""0bee1ce6-3e2c-5506-f71c-a7ba8f64a3d3"", ""patient_id"": ""b9c610cd-28a6-4636-ccb6-c7a0d2a4cb85"", ""gender"": ""M"", ""age"": 0, ""department"": ""wellness"", ""admission_time"": ""2019-05-26T05:07:38Z"", ""discharge_time"": ""2019-05-26T05:22:38Z"", ""organization_id"": ""f7ae497d-8dc6-3721-9402-43b621a4e7d2"", ""provider_id"": ""82608ebb-037c-3cef-9d34-3736d69b29e8"", ""payer_id"": ""7c4411ce-02f1-39b5-b9ec-dfbea9ad3c1a"", ""base_encounter_cost"": 129.16, ""total_claim_cost"": 1292.75, ""payer_coverage"": 1228.11}"
"{""encounter_id"": ""6e93bcf9-45a4-8528-0120-1c1eaa930faf"", ""patient_id"": ""b9c610cd-28a6-4636-ccb6-c7a0d2a4cb85"", ""gender"": ""M"", ""age"": 0, ""department"": ""wellness"", ""admission_time"": ""2019-07-28T05:07:38Z"", ""discharge_time"": ""2019-07-28T05:22:38Z"", ""organization_id"": ""f7ae497d-8dc6-3721-9402-43b621a4e7d2"", ""provider_id"": ""82608ebb-037c-3cef-9d34-3736d69b29e8"", ""payer_id"": ""7c4411ce-02f1-39b5-b9ec-dfbea9ad3c1a"", ""base_encounter_cost"": 129.16, ""total_claim_cost"": 1323.87, ""payer_coverage"": 1257.68}"
"{""encounter_id"": ""8b6787c3-4316-a0cb-899d-4746525c319f"", ""patient_id"": ""b9c610cd-28a6-4636-ccb6-c7a0d2a4cb85"", ""gender"": ""M"", ""age"": 0, ""department"": ""wellness"", ""admission_time"": ""2019-10-27T05:07:38Z"", ""discharge_time"": ""2019-10-27T05:22:38Z"", ""organization_id"": ""f7ae497d-8dc6-3721-9402-43b621a4e7d2"", ""provider_id"": ""82608ebb-037c-3cef-9d34-3736d69b29e8"", ""payer_id"": ""7c4411ce-02f1-39b5-b9ec-dfbea9ad3c1a"", ""base_encounter_cost"": 129.16, ""total_claim_cost"": 831.76, ""payer_coverage"": 790.17}"
"{""encounter_id"": ""8f424287-ee3a-c144-bc1d-3ba926e93fd5"", ""patient_id"": ""b9c610cd-28a6-4636-ccb6-c7a0d2a4cb85"", ""gender"": ""M"", ""age"": 0, ""department"": ""wellness"", ""admission_time"": ""2020-01-26T05:07:38Z"", ""discharge_time"": ""2020-01-26T05:22:38Z"", ""organization_id"": ""f7ae497d-8dc6-3721-9402-43b621a4e7d2"", ""provider_id"": ""82608ebb-037c-3cef-9d34-3736d69b29e8"", ""payer_id"": ""7c4411ce-02f1-39b5-b9ec-dfbea9ad3c1a"", ""base_encounter_cost"": 129.16, ""total_claim_cost"": 129.16, ""payer_coverage"": 122.7}"
"{""encounter_id"": ""fb15e123-fea7-cae8-6d49-ee9d2a85fc84"", ""patient_id"": ""b9c610cd-28a6-4636-ccb6-c7a0d2a4cb85"", ""gender"": ""M"", ""age"": 0, ""department"": ""ambulatory"", ""admission_time"": ""2020-02-05T06:07:38Z"", ""discharge_time"": ""2020-02-05T06:22:38Z"", ""organization_id"": ""5103c940-0c08-392f-95cd-446e0cea042a"", ""provider_id"": ""9f529022-166f-3db4-9f92-ff1ba11ee565"", ""payer_id"": ""7c4411ce-02f1-39b5-b9ec-dfbea9ad3c1a"", ""base_encounter_cost"": 77.49, ""total_claim_cost"": 77.49, ""payer_coverage"": 16.62}"
"{""encounter_id"": ""01efcc52-15d6-51e9-faa2-bee069fcbe44"", ""patient_id"": ""b9c610cd-28a6-4636-ccb6-c7a0d2a4cb85"", ""gender"": ""M"", ""age"": 1, ""department"": ""ambulatory"", ""admission_time"": ""2020-02-17T10:07:38Z"", ""discharge_time"": ""2020-02-17T10:40:32Z"", ""organization_id"": ""5103c940-0c08-392f-95cd-446e0cea042a"", ""provider_id"": ""9f529022-166f-3db4-9f92-ff1ba11ee565"", ""payer_id"": ""7c4411ce-02f1-39b5-b9ec-dfbea9ad3c1a"", ""base_encounter_cost"": 77.49, ""total_claim_cost"": 594.14, ""payer_coverage"": 507.43}"
"{""encounter_id"": ""1a7debfc-9582-7f23-a109-4f154a182ee2"", ""patient_id"": ""b9c610cd-28a6-4636-ccb6-c7a0d2a4cb85"", ""gender"": ""M"", ""age"": 1, ""department"": ""wellness"", ""admission_time"": ""2025-12-28T01:39:33+00:00Z"", ""discharge_time"": ""2020-04-26T05:22:38Z"", ""organization_id"": ""f7ae497d-8dc6-3721-9402-43b621a4e7d2"", ""provider_id"": ""82608ebb-037c-3cef-9d34-3736d69b29e8"", ""payer_id"": ""7c4411ce-02f1-39b5-b9ec-dfbea9ad3c1a"", ""base_encounter_cost"": 129.16, ""total_claim_cost"": 1614.96, ""payer_coverage"": 1534.21}"
"{""encounter_id"": ""bf38c711-941f-7509-f9ec-b864d6929f3f"", ""patient_id"": ""b9c610cd-28a6-4636-ccb6-c7a0d2a4cb85"", ""gender"": ""M"", ""age"": 1, ""department"": ""wellness"", ""admission_time"": ""2020-07-26T05:07:38Z"", ""discharge_time"": ""2020-07-26T05:22:38Z"", ""organization_id"": ""f7ae497d-8dc6-3721-9402-43b621a4e7d2"", ""provider_id"": ""82608ebb-037c-3cef-9d34-3736d69b29e8"", ""payer_id"": ""7c4411ce-02f1-39b5-b9ec-dfbea9ad3c1a"", ""base_encounter_cost"": 129.16, ""total_claim_cost"": 269.68, ""payer_coverage"": 256.2}"


In [0]:
print("STATUS:", query.status)
print("LAST PROGRESS:", query.lastProgress)

STATUS: {'message': 'Processing new data', 'isDataAvailable': True, 'isTriggerActive': True}
LAST PROGRESS: {'id': 'dee96e3e-3792-4d72-a921-aebedd969224', 'runId': '4426e6d2-2879-4157-a784-80d5be8eb2fe', 'name': None, 'timestamp': '2025-12-27T01:55:55.778Z', 'batchId': 287, 'batchDuration': 1046, 'numInputRows': 1, 'inputRowsPerSecond': 1.040582726326743, 'processedRowsPerSecond': 0.9560229445506692, 'durationMs': {'addBatch': 343, 'commitBatch': 384, 'commitOffsets': 131, 'getBatch': 0, 'latestOffset': 16, 'queryPlanning': 13, 'triggerExecution': 1046, 'walCommit': 149}, 'stateOperators': [], 'sources': [{'description': 'KafkaV2[Subscribe[healthcare-analytics-eh]]', 'startOffset': {'healthcare-analytics-eh': {'0': 3024}}, 'endOffset': {'healthcare-analytics-eh': {'0': 3025}}, 'latestOffset': {'healthcare-analytics-eh': {'0': 3025}}, 'numInputRows': 1, 'inputRowsPerSecond': 1.040582726326743, 'processedRowsPerSecond': 0.9560229445506692, 'metrics': {'avgOffsetsBehindLatest': '0.0', 'es