In [1]:
lakehouse_silverTable = "abfss://Fabric_E2E@onelake.dfs.fabric.microsoft.com/Lakehouse_Silver_.Lakehouse/Tables"
spark.conf.set("spark.executorEnv.lakehouse_silverTable", lakehouse_silverTable)
lakehouse_silver_table = spark.conf.get("spark.executorEnv.lakehouse_silverTable")
print(lakehouse_silver_table)


StatementMeta(, 21a4aad2-5c9d-4538-95d4-7a10aeeb8321, 3, Finished, Available, Finished)

abfss://Fabric_E2E@onelake.dfs.fabric.microsoft.com/Lakehouse_Silver_.Lakehouse/Tables


In [2]:
try:
    from pyspark.sql.functions import to_timestamp, date_format, col, when, row_number, lower
    from pyspark.sql.window import Window

    # Load encounters.csv
    enc_df = spark.read.option("header", "true").csv("Files/raw/encounters.csv")

    # Drop rows missing required fields
    enc_df = enc_df.dropna(subset=['Id', 'PATIENT', 'REASONDESCRIPTION', 'START'])

    # Rename and transform columns
    diag_df = enc_df \
        .withColumnRenamed("Id", "DIM_EncounterId") \
        .withColumnRenamed("PATIENT", "DIM_patientId") \
        .withColumnRenamed("REASONDESCRIPTION", "DIM_DiagnosisCode") \
        .withColumn("START", to_timestamp("START", "yyyy-MM-dd'T'HH:mm:ssX")) \
        .withColumn("DIM_DateId", date_format(col("START"), "yyyyMMdd"))

    # Add lowercase diagnosis column for LIKE matching
    diag_df = diag_df.withColumn("diagnosis_lower", lower(col("DIM_DiagnosisCode")))

    # Generate severity_score using LIKE on lowercase diagnosis
    diag_df = diag_df.withColumn(
        "severity_score",
        when(col("diagnosis_lower").like("%severe%"), 9)
        .when(col("diagnosis_lower").like("%moderate%"), 5)
        .when(col("diagnosis_lower").like("%mild%"), 3)
        .otherwise(1)
    )

    # Generate diagnosis_rank using window function
    windowSpec = Window.partitionBy("DIM_patientId", "DIM_DiagnosisCode").orderBy("START")
    diag_df = diag_df.withColumn("diagnosis_rank", row_number().over(windowSpec))

    # Final clean selection
    fact_diagnosis = diag_df.select(
        "DIM_EncounterId",
        "DIM_patientId",
        "DIM_DiagnosisCode",
        "DIM_DateId",
        "diagnosis_rank",
        "severity_score"
    )

    # Write to Silver layer
    fact_diagnosis.write \
        .mode("overwrite") \
        .format("delta") \
        .save(f"{lakehouse_silver_table}/FactDiagnosis")

except Exception as e:
    print(f"❌ Notebook 'Fact_Diagnosis' failed: {str(e)} — Skipping to next item in pipeline.")

StatementMeta(, 21a4aad2-5c9d-4538-95d4-7a10aeeb8321, 4, Finished, Available, Finished)