In [0]:
from pyspark.sql.functions import current_timestamp, input_file_name

# ADLS Gen2 configuration

spark.conf.set(
    "fs.azure.account.key.healthcarestoragerk.dfs.core.windows.net",
    "xxxxx"
)


# Base paths for RAW (CSV) and BRONZE (Delta)

raw_base_path    = "abfss://raw@healthcarestoragerk.dfs.core.windows.net/synthea"
bronze_base_path = "abfss://bronze@healthcarestoragerk.dfs.core.windows.net/reference"

# Map logical table names to CSV filenames
tables = {
    "patients"            : "patients.csv",
    "conditions"          : "conditions.csv",
    "organizations"       : "organizations.csv",
    "claims"              : "claims.csv",
    "claims_transactions" : "claims_transactions.csv"
}

loaded_tables = []

for table_name, filename in tables.items():
    raw_path    = f"{raw_base_path}/{filename}"
    bronze_path = f"{bronze_base_path}/{table_name}_bronze"

    print(f"\n Loading {table_name} from {raw_path}")

    # Read CSV with header & schema inference
    df = (
        spark.read
             .option("header", "true")
             .option("inferSchema", "true")
             .csv(raw_path)
             .withColumn("_ingestion_timestamp", current_timestamp())
             .withColumn("_source_file", input_file_name())
    )

    print(f"{table_name}: {df.count()} rows read.")

    # Write as Delta to Bronze (overwrite is fine for full batch loads)
    (
        df.write
          .format("delta")
          .mode("overwrite")              # full reload each time
          .option("overwriteSchema", "true")
          .save(bronze_path)
    )

    print(f"{table_name}: written to Bronze at {bronze_path}")
    loaded_tables.append((table_name, bronze_path))

print("\nBronze batch load complete")
for t, p in loaded_tables:
    print(f" - {t}_bronze -> {p}")



 Loading patients from abfss://raw@healthcarestoragerk.dfs.core.windows.net/synthea/patients.csv
patients: 1163 rows read.
patients: written to Bronze at abfss://bronze@healthcarestoragerk.dfs.core.windows.net/reference/patients_bronze

 Loading conditions from abfss://raw@healthcarestoragerk.dfs.core.windows.net/synthea/conditions.csv
conditions: 38094 rows read.
conditions: written to Bronze at abfss://bronze@healthcarestoragerk.dfs.core.windows.net/reference/conditions_bronze

 Loading organizations from abfss://raw@healthcarestoragerk.dfs.core.windows.net/synthea/organizations.csv
organizations: 1127 rows read.
organizations: written to Bronze at abfss://bronze@healthcarestoragerk.dfs.core.windows.net/reference/organizations_bronze

 Loading claims from abfss://raw@healthcarestoragerk.dfs.core.windows.net/synthea/claims.csv
claims: 117889 rows read.
claims: written to Bronze at abfss://bronze@healthcarestoragerk.dfs.core.windows.net/reference/claims_bronze

 Loading claims_transac

In [0]:
org_bronze_path = "abfss://bronze@healthcarestoragerk.dfs.core.windows.net/reference/organizations_bronze"
org_df = spark.read.format("delta").load(org_bronze_path)
display(org_df)

Id,NAME,ADDRESS,CITY,STATE,ZIP,LAT,LON,PHONE,REVENUE,UTILIZATION,_ingestion_timestamp,_source_file
ef58ea08-d883-3957-8300-150554edc8fb,HEALTHALLIANCE HOSPITALS INC,60 HOSPITAL ROAD,LEOMINSTER,MA,01453,42.520838,-71.770876,9784662000,0.0,1214,2025-12-28T01:57:35.427435Z,abfss://raw@healthcarestoragerk.dfs.core.windows.net/synthea/organizations.csv
69176529-fd1f-3b3f-abce-a0a3626769eb,MOUNT AUBURN HOSPITAL,330 MOUNT AUBURN STREET,CAMBRIDGE,MA,02138,42.375967,-71.118275,6174923500,0.0,2877,2025-12-28T01:57:35.427435Z,abfss://raw@healthcarestoragerk.dfs.core.windows.net/synthea/organizations.csv
5e765f2b-e908-3888-9fc7-df2cb87beb58,STURDY MEMORIAL HOSPITAL,211 PARK STREET,ATTLEBORO,MA,02703,41.931653,-71.294503,5082225200,0.0,2365,2025-12-28T01:57:35.427435Z,abfss://raw@healthcarestoragerk.dfs.core.windows.net/synthea/organizations.csv
f1fbcbfb-fcfa-3bd2-b7f4-df20f1b3c3a4,LAWRENCE GENERAL HOSPITAL,ONE GENERAL STREET,LAWRENCE,MA,01842,42.700273,-71.161357,9786834000,0.0,976,2025-12-28T01:57:35.427435Z,abfss://raw@healthcarestoragerk.dfs.core.windows.net/synthea/organizations.csv
e002090d-4e92-300e-b41e-7d1f21dee4c6,CAMBRIDGE HEALTH ALLIANCE,1493 CAMBRIDGE STREET,CAMBRIDGE,MA,02138,42.375967,-71.118275,6176652300,0.0,2706,2025-12-28T01:57:35.427435Z,abfss://raw@healthcarestoragerk.dfs.core.windows.net/synthea/organizations.csv
ef6ab57c-ed94-3dbe-9861-812d515918b3,CAPE COD HOSPITAL,88 LEWIS BAY ROAD,HYANNIS,MA,02601,41.748854,-70.74053599999998,5087711800,0.0,2071,2025-12-28T01:57:35.427435Z,abfss://raw@healthcarestoragerk.dfs.core.windows.net/synthea/organizations.csv
49318f80-bd8b-3fc7-a096-ac43088b0c12,COOLEY DICKINSON HOSPITAL INC THE,30 LOCUST STREET,NORTHAMPTON,MA,01060,42.327044,-72.67463000000002,4135822000,0.0,1533,2025-12-28T01:57:35.427435Z,abfss://raw@healthcarestoragerk.dfs.core.windows.net/synthea/organizations.csv
fbf6180e-b800-3ebe-b91d-93d0288c400e,BAYSTATE FRANKLIN MEDICAL CENTER,164 HIGH STREET,GREENFIELD,MA,01301,42.614671,-72.597063,4137730211,0.0,1754,2025-12-28T01:57:35.427435Z,abfss://raw@healthcarestoragerk.dfs.core.windows.net/synthea/organizations.csv
8b58cdd1-3d79-3126-8fe0-da2c54d6805c,CARNEY HOSPITAL,2100 DORCHESTER AVENUE,BOSTON,MA,02124,42.33196,-71.020173,6175062000,0.0,576,2025-12-28T01:57:35.427435Z,abfss://raw@healthcarestoragerk.dfs.core.windows.net/synthea/organizations.csv
4bdaa4c2-c664-3089-aee2-7137abbad27f,HARRINGTON MEMORIAL HOSPITAL-1,100 SOUTH STREET,SOUTHBRIDGE,MA,01550,42.059669,-72.03404,5087659771,0.0,667,2025-12-28T01:57:35.427435Z,abfss://raw@healthcarestoragerk.dfs.core.windows.net/synthea/organizations.csv
