In [1]:
lakehouse_silverTable = "abfss://Fabric_E2E@onelake.dfs.fabric.microsoft.com/Lakehouse_Silver_.Lakehouse/Tables"
spark.conf.set("spark.executorEnv.lakehouse_silverTable", lakehouse_silverTable)
lakehouse_silver_table = spark.conf.get("spark.executorEnv.lakehouse_silverTable")
print(lakehouse_silver_table)


StatementMeta(, 1b2ad0a2-3b4d-44c2-b246-ea9b11b08775, 3, Finished, Available, Finished)

abfss://Fabric_E2E@onelake.dfs.fabric.microsoft.com/Lakehouse_Silver_.Lakehouse/Tables


In [2]:
try:
    from pyspark.sql.types import StructType, StructField, StringType, DoubleType, IntegerType
    from pyspark.sql.functions import col

    # Define schema for reading
    fact_lab_schema = StructType([
        StructField("Fact_LabResultId", IntegerType(), True),
        StructField("DIM_patientId", StringType(), True),
        StructField("DIM_EncounterId", StringType(), True),
        StructField("DIM_LabTestCode", StringType(), True),
        StructField("DIM_DateId", StringType(), True),
        StructField("result_value", DoubleType(), True),
        StructField("units", StringType(), True),
        StructField("reference_range", StringType(), True)
    ])

    # Load CSV with schema
    fact_lab_df = spark.read.option("header", True).schema(fact_lab_schema).csv("Files/raw/Data_LabResults.csv")

    # Filter out NULL patient or encounter IDs
    fact_lab_df_clean = fact_lab_df.filter(
        col("DIM_patientId").isNotNull() & col("DIM_EncounterId").isNotNull()
    )

    # Save cleaned version to Silver
    fact_lab_df_clean.write \
        .mode("overwrite") \
        .format("delta") \
        .save(f"{lakehouse_silver_table}/FactLabResults")

except Exception as e:
    print(f"❌ Notebook 'Fact_LabResults' failed: {str(e)} — Skipping to next item in pipeline.")

StatementMeta(, 1b2ad0a2-3b4d-44c2-b246-ea9b11b08775, 4, Finished, Available, Finished)