In [3]:
lakehouse_silverTable = "abfss://Fabric_E2E@onelake.dfs.fabric.microsoft.com/Lakehouse_Silver_.Lakehouse/Tables"
spark.conf.set("spark.executorEnv.lakehouse_silverTable", lakehouse_silverTable)
lakehouse_silver_table = spark.conf.get("spark.executorEnv.lakehouse_silverTable")
print(lakehouse_silver_table)


StatementMeta(, 32fdd67e-6294-4b14-bee7-c5cbb16eb322, 4, Finished, Available, Finished)

abfss://Fabric_E2E@onelake.dfs.fabric.microsoft.com/Lakehouse_Silver_.Lakehouse/Tables


In [5]:
try:
    from pyspark.sql.functions import to_timestamp, date_format, col, row_number, expr, greatest, when, coalesce, lit
    from pyspark.sql.window import Window

    # Load encounters.csv
    enc_df = spark.read.option("header", "true").csv("Files/raw/encounters.csv")

    # Drop rows with required values missing
    enc_df = enc_df.dropna(subset=[
        'Id', 'PATIENT', 'START', 'PAYER', 'ORGANIZATION',
        'TOTAL_CLAIM_COST', 'BASE_ENCOUNTER_COST', 'PAYER_COVERAGE'
    ])

    # Transform and rename fields
    billing_df = enc_df \
        .withColumnRenamed("Id", "DIM_EncounterId") \
        .withColumnRenamed("PATIENT", "DIM_patientId") \
        .withColumnRenamed("PAYER", "DIM_payerId") \
        .withColumnRenamed("ORGANIZATION", "DIM_providerId") \
        .withColumnRenamed("CODE", "DIM_ProcedureCode") \
        .withColumn("START_TS", to_timestamp("START", "yyyy-MM-dd'T'HH:mm:ssX")) \
        .withColumn("DIM_DateId", date_format(col("START_TS"), "yyyyMMdd")) \
        .withColumn("DIM_DiagnosisCode", coalesce(col("REASONDESCRIPTION"), lit("UNKNOWN")))

    # Convert amounts to double and handle nulls safely
    billing_df = billing_df \
        .withColumn("billed_amount", coalesce(col("TOTAL_CLAIM_COST").cast("double"), lit(0.0))) \
        .withColumn("paid_amount", coalesce(col("PAYER_COVERAGE").cast("double"), lit(0.0))) \
        .withColumn("co_pay", coalesce(col("BASE_ENCOUNTER_COST").cast("double"), lit(0.0)))

    # Calculate deductible safely
    billing_df = billing_df.withColumn(
        "deductible",
        greatest(col("billed_amount") - col("paid_amount") - col("co_pay"), lit(0.0))
    )

    # ✅ Save to Silver
    billing_df.write.mode("overwrite").format("delta").option("overwriteSchema", "true").save(f"{lakehouse_silver_table}/FactBilling")

except Exception as e:
    print(f" Notebook failed: {str(e)} — Skipping to next item in pipeline.")

StatementMeta(, 32fdd67e-6294-4b14-bee7-c5cbb16eb322, 6, Finished, Available, Finished)