In [0]:
from pyspark.sql.functions import (
    col, sum as _sum, current_timestamp
)

spark.conf.set(
    "fs.azure.account.key.healthcarestoragerk.dfs.core.windows.net",
    "xxxxx"
)

silver_base = "abfss://silver@healthcarestoragerk.dfs.core.windows.net"

# Paths
encounters_clean_path = f"{silver_base}/encounters_clean"
dim_patient_path      = f"{silver_base}/dim_patient"
dim_org_path          = f"{silver_base}/dim_organization"
claims_silver_path    = f"{silver_base}/claims"
claims_tx_silver_path = f"{silver_base}/claims_transactions"

# Load core tables
enc = spark.read.format("delta").load(encounters_clean_path)
dim_patient = spark.read.format("delta").load(dim_patient_path)
dim_org = spark.read.format("delta").load(dim_org_path)
claims_silver = spark.read.format("delta").load(claims_silver_path)
claims_tx_silver = spark.read.format("delta").load(claims_tx_silver_path)

# Pre-aggregate claim transactions per claim_id
claims_tx_agg = (
    claims_tx_silver
    .groupBy("claim_id")
    .agg(
        _sum("payments").alias("total_payments"),
        _sum("adjustments").alias("total_adjustments"),
        _sum("transfers").alias("total_transfers"),
        _sum("outstanding").alias("total_outstanding")
    )
)

# Join encounters with patients
fact = (
    enc.alias("e")
    .join(dim_patient.alias("p"), col("e.patient_id") == col("p.patient_id"), "left")
    .join(dim_org.alias("o"), col("e.organization_id") == col("o.organization_id"), "left")
    # join to claims on encounter_id
    .join(claims_silver.alias("c"), col("e.encounter_id") == col("c.encounter_id"), "left")
    # join to aggregated claim transactions
    .join(claims_tx_agg.alias("t"), col("c.claim_id") == col("t.claim_id"), "left")
    .withColumn("_fact_build_ts", current_timestamp())
)

# Choose a clean set of columns for the fact table
fact_encounter = fact.select(
    # keys
    col("e.encounter_id"),
    col("e.patient_id"),
    col("e.organization_id"),
    col("e.provider_id"),
    col("e.payer_id"),
    # encounter attributes
    col("e.gender"),
    col("e.age"),
    col("e.department"),
    col("e.admission_time"),
    col("e.discharge_time"),
    col("e.encounter_duration_minutes"),
    col("e.base_encounter_cost"),
    col("e.total_claim_cost"),
    col("e.payer_coverage"),
    # patient attributes
    col("p.FIRST").alias("patient_first_name"),
    col("p.LAST").alias("patient_last_name"),
    col("p.gender").alias("patient_gender"),
    col("p.race").alias("patient_race"),
    col("p.ethnicity").alias("patient_ethnicity"),
    col("p.birthdate").alias("patient_birthdate"),
    # organization attributes
    col("o.name").alias("organization_name"),
    col("o.city").alias("organization_city"),
    col("o.state").alias("organization_state"),
    # claims attributes
    col("c.claim_id"),
    col("c.current_illness_date"),
    col("c.service_date"),
    col("c.status_primary"),
    col("c.status_secondary"),
    col("c.status_patient"),
    col("c.outstanding_primary"),
    col("c.outstanding_secondary"),
    col("c.outstanding_patient"),
    # claim transaction aggregates
    col("t.total_payments"),
    col("t.total_adjustments"),
    col("t.total_transfers"),
    col("t.total_outstanding"),
    col("_fact_build_ts")
)

fact_path = f"{silver_base}/fact_encounter"
(
    fact_encounter.write
        .format("delta")
        .mode("overwrite")
        .option("overwriteSchema", "true")
        .save(fact_path)
)

print(f"fact_encounter written to {fact_path} with {fact_encounter.count()} rows.")


fact_encounter written to abfss://silver@healthcarestoragerk.dfs.core.windows.net/fact_encounter with 16161 rows.


In [0]:
display(spark.read.format("delta").load("abfss://silver@healthcarestoragerk.dfs.core.windows.net/fact_encounter"))

encounter_id,patient_id,organization_id,provider_id,payer_id,gender,age,department,admission_time,discharge_time,encounter_duration_minutes,base_encounter_cost,total_claim_cost,payer_coverage,patient_first_name,patient_last_name,patient_gender,patient_race,patient_ethnicity,patient_birthdate,organization_name,organization_city,organization_state,claim_id,current_illness_date,service_date,status_primary,status_secondary,status_patient,outstanding_primary,outstanding_secondary,outstanding_patient,total_payments,total_adjustments,total_transfers,total_outstanding,_fact_build_ts
748f8357-6cc7-551d-f31a-32fa2cf84126,b9c610cd-28a6-4636-ccb6-c7a0d2a4cb85,f7ae497d-8dc6-3721-9402-43b621a4e7d2,82608ebb-037c-3cef-9d34-3736d69b29e8,7c4411ce-02f1-39b5-b9ec-dfbea9ad3c1a,M,0,wellness,2019-02-17T05:07:38Z,2019-02-17T05:22:38Z,15.0,129.16,877.79,833.9,Damon455,Langosh790,M,white,nonhispanic,2019-02-17,PCP14023,EAST LONGMEADOW,MA,e413105d-6f23-34ef-724a-b42adab9df22,2019-02-17T05:07:38Z,2019-02-17T05:07:38Z,CLOSED,,CLOSED,0.0,,0.0,877.79,,87.8,131.7,2025-12-28T04:16:41.860354Z
5a4735ae-423f-6563-28ab-b3d11b49b2d4,b9c610cd-28a6-4636-ccb6-c7a0d2a4cb85,f7ae497d-8dc6-3721-9402-43b621a4e7d2,82608ebb-037c-3cef-9d34-3736d69b29e8,7c4411ce-02f1-39b5-b9ec-dfbea9ad3c1a,M,0,wellness,2019-03-24T05:07:38Z,2019-03-24T05:22:38Z,15.0,129.16,269.68,256.2,Damon455,Langosh790,M,white,nonhispanic,2019-02-17,PCP14023,EAST LONGMEADOW,MA,8e430d76-6628-c3ac-8950-2acabeb34f86,2019-03-24T05:07:38Z,2019-03-24T05:07:38Z,CLOSED,,CLOSED,0.0,,0.0,269.67999999999995,,26.98,40.47,2025-12-28T04:16:41.860354Z
0bee1ce6-3e2c-5506-f71c-a7ba8f64a3d3,b9c610cd-28a6-4636-ccb6-c7a0d2a4cb85,f7ae497d-8dc6-3721-9402-43b621a4e7d2,82608ebb-037c-3cef-9d34-3736d69b29e8,7c4411ce-02f1-39b5-b9ec-dfbea9ad3c1a,M,0,wellness,2019-05-26T05:07:38Z,2019-05-26T05:22:38Z,15.0,129.16,1292.75,1228.11,Damon455,Langosh790,M,white,nonhispanic,2019-02-17,PCP14023,EAST LONGMEADOW,MA,3a69d5f0-26d8-fe66-4a1b-1a7c9ab618c8,2019-05-26T05:07:38Z,2019-05-26T05:07:38Z,CLOSED,,CLOSED,0.0,,0.0,1292.7499999999998,,129.32,193.98000000000005,2025-12-28T04:16:41.860354Z
6e93bcf9-45a4-8528-0120-1c1eaa930faf,b9c610cd-28a6-4636-ccb6-c7a0d2a4cb85,f7ae497d-8dc6-3721-9402-43b621a4e7d2,82608ebb-037c-3cef-9d34-3736d69b29e8,7c4411ce-02f1-39b5-b9ec-dfbea9ad3c1a,M,0,wellness,2019-07-28T05:07:38Z,2019-07-28T05:22:38Z,15.0,129.16,1323.87,1257.68,Damon455,Langosh790,M,white,nonhispanic,2019-02-17,PCP14023,EAST LONGMEADOW,MA,af33009b-e02a-6959-e5cc-5dffc7398cbd,2019-07-28T05:07:38Z,2019-07-28T05:07:38Z,CLOSED,,CLOSED,0.0,,0.0,1323.8699999999997,,132.44,198.66000000000005,2025-12-28T04:16:41.860354Z
8b6787c3-4316-a0cb-899d-4746525c319f,b9c610cd-28a6-4636-ccb6-c7a0d2a4cb85,f7ae497d-8dc6-3721-9402-43b621a4e7d2,82608ebb-037c-3cef-9d34-3736d69b29e8,7c4411ce-02f1-39b5-b9ec-dfbea9ad3c1a,M,0,wellness,2019-10-27T05:07:38Z,2019-10-27T05:22:38Z,15.0,129.16,831.76,790.17,Damon455,Langosh790,M,white,nonhispanic,2019-02-17,PCP14023,EAST LONGMEADOW,MA,3153e655-a843-2bf1-72eb-1f06a52e524a,2019-10-27T05:07:38Z,2019-10-27T05:07:38Z,CLOSED,,CLOSED,0.0,,0.0,831.7599999999999,,83.22,124.83,2025-12-28T04:16:41.860354Z
8f424287-ee3a-c144-bc1d-3ba926e93fd5,b9c610cd-28a6-4636-ccb6-c7a0d2a4cb85,f7ae497d-8dc6-3721-9402-43b621a4e7d2,82608ebb-037c-3cef-9d34-3736d69b29e8,7c4411ce-02f1-39b5-b9ec-dfbea9ad3c1a,M,0,wellness,2020-01-26T05:07:38Z,2020-01-26T05:22:38Z,15.0,129.16,129.16,122.7,Damon455,Langosh790,M,white,nonhispanic,2019-02-17,PCP14023,EAST LONGMEADOW,MA,3d8c5921-885e-740d-8a14-3ee3e1118df7,2020-01-26T05:07:38Z,2020-01-26T05:07:38Z,CLOSED,,CLOSED,0.0,,0.0,129.16,,12.92,19.38,2025-12-28T04:16:41.860354Z
fb15e123-fea7-cae8-6d49-ee9d2a85fc84,b9c610cd-28a6-4636-ccb6-c7a0d2a4cb85,5103c940-0c08-392f-95cd-446e0cea042a,9f529022-166f-3db4-9f92-ff1ba11ee565,7c4411ce-02f1-39b5-b9ec-dfbea9ad3c1a,M,0,ambulatory,2020-02-05T06:07:38Z,2020-02-05T06:22:38Z,15.0,77.49,77.49,16.62,Damon455,Langosh790,M,white,nonhispanic,2019-02-17,SHRINERS' HOSPITAL FOR CHILDREN (THE),SPRINGFIELD,MA,23b8db87-5ad9-387e-879e-4c2ebba99ab0,2020-02-05T06:07:38Z,2020-02-05T06:07:38Z,CLOSED,,CLOSED,0.0,,0.0,77.49000000000001,,1.74,20.1,2025-12-28T04:16:41.860354Z
01efcc52-15d6-51e9-faa2-bee069fcbe44,b9c610cd-28a6-4636-ccb6-c7a0d2a4cb85,5103c940-0c08-392f-95cd-446e0cea042a,9f529022-166f-3db4-9f92-ff1ba11ee565,7c4411ce-02f1-39b5-b9ec-dfbea9ad3c1a,M,1,ambulatory,2020-02-17T10:07:38Z,2020-02-17T10:40:32Z,32.9,77.49,594.14,507.43,Damon455,Langosh790,M,white,nonhispanic,2019-02-17,SHRINERS' HOSPITAL FOR CHILDREN (THE),SPRINGFIELD,MA,fb2d52cb-74c6-593f-fd59-a99caf182dcc,2020-02-17T10:07:38Z,2020-02-17T10:07:38Z,CLOSED,,CLOSED,0.0,,0.0,,,,,2025-12-28T04:16:41.860354Z
01efcc52-15d6-51e9-faa2-bee069fcbe44,b9c610cd-28a6-4636-ccb6-c7a0d2a4cb85,5103c940-0c08-392f-95cd-446e0cea042a,9f529022-166f-3db4-9f92-ff1ba11ee565,7c4411ce-02f1-39b5-b9ec-dfbea9ad3c1a,M,1,ambulatory,2020-02-17T10:07:38Z,2020-02-17T10:40:32Z,32.9,77.49,594.14,507.43,Damon455,Langosh790,M,white,nonhispanic,2019-02-17,SHRINERS' HOSPITAL FOR CHILDREN (THE),SPRINGFIELD,MA,350c3a2c-9262-c79b-1920-71250474a0e1,2020-02-17T10:07:38Z,2020-02-17T10:07:38Z,CLOSED,,CLOSED,0.0,,0.0,,,,,2025-12-28T04:16:41.860354Z
01efcc52-15d6-51e9-faa2-bee069fcbe44,b9c610cd-28a6-4636-ccb6-c7a0d2a4cb85,5103c940-0c08-392f-95cd-446e0cea042a,9f529022-166f-3db4-9f92-ff1ba11ee565,7c4411ce-02f1-39b5-b9ec-dfbea9ad3c1a,M,1,ambulatory,2020-02-17T10:07:38Z,2020-02-17T10:40:32Z,32.9,77.49,594.14,507.43,Damon455,Langosh790,M,white,nonhispanic,2019-02-17,SHRINERS' HOSPITAL FOR CHILDREN (THE),SPRINGFIELD,MA,988099b8-dad2-f862-35cb-c76cfffc046f,2020-02-17T10:07:38Z,2020-02-17T10:07:38Z,CLOSED,,CLOSED,0.0,,0.0,594.14,,53.39999999999999,97.59,2025-12-28T04:16:41.860354Z
