In [0]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.window import Window

def dim_program_progress(catalog_name):
    # Load required source tables
    analytics_program_completions = f"{catalog_name}.cleaned_data.analytics_program_completions"
    analytics_program_overview = f"{catalog_name}.cleaned_data.analytics_program_overview"
    completions = spark.read.table(analytics_program_completions)
    overview = spark.read.table(analytics_program_overview)

    # Step 1: Create base 'program_users' dataset with unique composite key
    program_users = completions.select(
        F.concat_ws("_", F.col("User ID").cast("string"), F.col("Program ID").cast("string")).alias("program_user_id_str"),
        F.col("Program ID").alias("program_id"),
        F.col("User ID").alias("user_id"),
        F.col("Status").alias("status"),
        F.col("Time assigned").alias("time_assigned"),
        F.col("Time completed").alias("time_completed"),
        F.col("Time started").alias("time_started")
    )

    # Step 2: Identify currently assigned users
    currently_assigned = overview.select("Program ID", "User ID").distinct() \
                                 .withColumnRenamed("Program ID", "program_id") \
                                 .withColumnRenamed("User ID", "user_id")

    # Step 3: Join and compute final program progress logic
    joined = program_users.join(currently_assigned, ["program_id", "user_id"], how="left")

    final_program_progress = joined.withColumn(
        "time_completed",
        F.when(F.col("program_id").isNull(), 
            F.when(F.col("time_completed").isNull(), F.col("time_assigned"))
             .otherwise(F.col("time_completed"))
        ).otherwise(F.col("time_completed"))
    ).withColumn(
        "assignmentstatus",
        F.when(currently_assigned["program_id"].isNull(), F.lit("Unassigned")).otherwise(F.lit("Assigned"))
    )

    # Step 4: Add program_user_id using row_number for unique ID
    window_spec = Window.orderBy("program_user_id_str")
    final_program_progress = final_program_progress.withColumn("program_user_id", F.row_number().over(window_spec)) \
                                                   .select(
                                                       "program_user_id",
                                                       "program_id",
                                                       "user_id",
                                                       "status",
                                                       "time_assigned",
                                                       "time_completed",
                                                       "time_started",
                                                       "assignmentstatus"
                                                   )

    # Step 5: Append synthetic 'N/A' record
    na_row = spark.createDataFrame([
        (-1, -1, -1, "N/A", "1900-01-01", "1900-01-01", "1900-01-01", "N/A")
    ], ["program_user_id", "program_id", "user_id", "status", "time_assigned", "time_completed", "time_started", "assignmentstatus"])

    result = final_program_progress.unionByName(na_row)

    # Save to Delta
    output_path = f"{catalog_name}.schema.dim_program_progress"
    result.write.format("delta").mode("overwrite").saveAsTable(output_path)

    return result