In [0]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F

def dim_programs(catalog_name):
    # Load the input table
    analytics_program_completions = f"{catalog_name}.cleaned_data.analytics_program_completions"
    programs_df = spark.read.table(analytics_program_completions)

    # Aggregate actual programs
    grouped_programs = (
        programs_df
        .groupBy("Program ID")
        .agg(
            F.max("Program Name").alias("program_name"),
            F.max("Category").alias("category"),
            F.max("Include in Dashboard").alias("include_in_dashboard")
        )
        .withColumnRenamed("Program ID", "program_id")
    )

    # Add synthetic "N/A" program row
    na_row = spark.createDataFrame([
        (-1, "N/A", "N/A", False)
    ], ["program_id", "program_name", "category", "include_in_dashboard"])

    # Union both sets
    result = grouped_programs.unionByName(na_row)

    # Save as Delta
    output_path = f"{catalog_name}.schema.dim_programs"
    result.write.format("delta").mode("overwrite").saveAsTable(output_path)

    return result