In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, to_timestamp, regexp_replace, when
from pyspark.sql.types import LongType

def clean_program_completions(catalog_name):
    spark = SparkSession.builder.getOrCreate()

    csv_path = f"/Volumes/{catalog_name}/raw_data/{catalog_name}_data/kineo_analytics___program_completions_report_report.csv"
    table_path = f"{catalog_name}.cleaned_data.analytics_program_completions"

    columns_to_transform_to_datetime = [
        "Time assigned", "Time started", "Time completed", "Time due"
    ]
    columns_to_transform_to_boolean = ["Include in dashboard"]
    columns_to_transform_to_bigint = ["User ID", "Program ID"]

    try:
        # Read CSV
        df = spark.read.format("csv").option("header", "true").load(csv_path)

        # Convert datetime columns
        for col_name in columns_to_transform_to_datetime:
            df = df.withColumn(
                col_name,
                to_timestamp(regexp_replace(col(col_name), ",", ""), "dd/MM/yyyy HH:mm:ss")
            )

        # Convert boolean fields
        for col_name in columns_to_transform_to_boolean:
            df = df.withColumn(
                col_name,
                when(col(col_name) == "1", True).otherwise(False)
            )

        # Convert bigint fields
        for col_name in columns_to_transform_to_bigint:
            df = df.withColumn(
                col_name,
                col(col_name).cast(LongType())
            )

        # Preview
        display(df)

        # Write to Delta Table
        df.write.format("delta") \
            .option("delta.columnMapping.mode", "name") \
            .mode("overwrite") \
            .saveAsTable(table_path)

        print(f"Successfully processed {csv_path} -> {table_path}")
    except Exception as e:
        print(f"Data cleaning failed: {e}")