In [1]:
lakehouse_silverTable = "abfss://Fabric_E2E@onelake.dfs.fabric.microsoft.com/Lakehouse_Silver_.Lakehouse/Tables"
spark.conf.set("spark.executorEnv.lakehouse_silverTable", lakehouse_silverTable)
lakehouse_silver_table = spark.conf.get("spark.executorEnv.lakehouse_silverTable")
print(lakehouse_silver_table)


StatementMeta(, 831e43fc-6a27-47f8-90b8-141e0a8fae07, 3, Finished, Available, Finished)

abfss://Fabric_E2E@onelake.dfs.fabric.microsoft.com/Lakehouse_Silver_.Lakehouse/Tables


In [2]:
try:
    from pyspark.sql.functions import to_timestamp, date_format, col, unix_timestamp, row_number, lit
    from pyspark.sql.window import Window

    # Load encounters.csv
    enc_df = spark.read.option("header", "true").csv("Files/raw/encounters.csv")

    # Drop rows with critical nulls
    enc_df = enc_df.dropna(subset=['Id', 'PATIENT', 'START', 'STOP', 'ORGANIZATION'])

    # Transform and rename
    appt_df = enc_df \
        .withColumnRenamed("Id", "DIM_EncounterId") \
        .withColumnRenamed("PATIENT", "DIM_patientId") \
        .withColumnRenamed("ORGANIZATION", "DIM_providerId") \
        .withColumn("START_TS", to_timestamp("START", "yyyy-MM-dd'T'HH:mm:ssX")) \
        .withColumn("STOP_TS", to_timestamp("STOP", "yyyy-MM-dd'T'HH:mm:ssX")) \
        .withColumn("DIM_DateId", date_format(col("START_TS"), "yyyyMMdd"))

    # Calculate duration in minutes
    appt_df = appt_df.withColumn(
        "duration_minutes",
        (unix_timestamp("STOP_TS") - unix_timestamp("START_TS")) / 60
    )

    # Add NULL wait_time (scheduled time not available)
    appt_df = appt_df.withColumn("wait_time", lit(None).cast("double"))

    # Approximate appointment status
    appt_df = appt_df.withColumn("status", col("DESCRIPTION"))

    # Use provider as department (fallback)
    appt_df = appt_df.withColumn("DIM_departmentId", col("DIM_providerId"))

    # Add surrogate key
    windowSpec = Window.orderBy("DIM_patientId", "DIM_DateId")
    appt_df = appt_df.withColumn("Fact_AppointmentId", row_number().over(windowSpec))

    raise Exception("💣 Simulated error for testing skip behavior")


    # Final selection
    fact_appointments = appt_df.select(
        "Fact_AppointmentId",
        "DIM_patientId",
        "DIM_providerId",
        "DIM_departmentId",
        "DIM_DateId",
        "wait_time",
        "duration_minutes",
        "status"
    )

    # Save to Silver layer
    fact_appointments.write \
        .mode("overwrite") \
        .format("delta") \
        .save(f"{lakehouse_silver_table}/FactAppointments")

except Exception as e:
    print(f"❌ Notebook 'Fact_Appointments' failed: {str(e)} — Skipping to next item in pipeline.")


StatementMeta(, 831e43fc-6a27-47f8-90b8-141e0a8fae07, 4, Finished, Available, Finished)

❌ Notebook 'Fact_Appointments' failed: 💣 Simulated error for testing skip behavior — Skipping to next item in pipeline.
