In [1]:
lakehouse_silverTable = "abfss://Fabric_E2E@onelake.dfs.fabric.microsoft.com/Lakehouse_Silver_.Lakehouse/Tables"
spark.conf.set("spark.executorEnv.lakehouse_silverTable", lakehouse_silverTable)
lakehouse_silver_table = spark.conf.get("spark.executorEnv.lakehouse_silverTable")
print(lakehouse_silver_table)


StatementMeta(, cdc93cc4-6ac7-4438-97ae-142997d2a9e8, 3, Finished, Available, Finished)

abfss://Fabric_E2E@onelake.dfs.fabric.microsoft.com/Lakehouse_Silver_.Lakehouse/Tables


In [2]:
try:
    from pyspark.sql.types import StructType, StructField, StringType, DoubleType, IntegerType
    from pyspark.sql.functions import col, row_number
    from pyspark.sql.window import Window

    # Step 1: Define schema for clarity (optional)
    vitals_schema = StructType([
        StructField("Fact_VitalId", IntegerType(), True),
        StructField("DIM_patientId", StringType(), True),
        StructField("DIM_DateId", StringType(), True),
        StructField("vital_type", StringType(), True),
        StructField("vital_value", DoubleType(), True),
        StructField("unit", StringType(), True)
    ])

    # Step 2: Load CSV (not enforcing schema because of extra columns)
    vitals_df = spark.read.option("header", True).csv("Files/raw/Data_LabResults.csv")

    # Step 3: Filter essential rows
    vitals_filtered = vitals_df.filter(
        col("vital_type").isNotNull() & col("vital_value").isNotNull()
    )

    # Step 4: Add surrogate key
    windowSpec = Window.orderBy("DIM_patientId", "DIM_DateId")
    vitals_ranked = vitals_filtered.withColumn("Fact_VitalId", row_number().over(windowSpec))

    # Step 5: Recast to match schema
    fact_vitals_df = vitals_ranked.select(
        col("Fact_VitalId").cast("int"),
        col("DIM_patientId").cast("string"),
        col("DIM_DateId").cast("string"),
        col("vital_type").cast("string"),
        col("vital_value").cast("double"),
        col("units").cast("string").alias("unit")
    )

    # Step 6: Write to Silver layer
    fact_vitals_df.write \
        .mode("overwrite") \
        .format("delta") \
        .save(f"{lakehouse_silver_table}/FactVitals")

except Exception as e:
    print(f"❌ Notebook 'Fact_Vitals' failed: {str(e)} — Skipping to next item in pipeline.")

StatementMeta(, cdc93cc4-6ac7-4438-97ae-142997d2a9e8, 4, Finished, Available, Finished)