In [0]:
from pyspark.sql.functions import when, coalesce, to_date, substring, initcap, trim, col, lit, lower, current_timestamp, concat, upper, to_timestamp
from pyspark.sql.types import LongType, StringType, DateType, TimestampType
from datetime import datetime

In [0]:
def parse_fecha_devolucion_safe(fecha_col: str):
   
    col_trimmed = trim(col(fecha_col))
    
    return coalesce(
        
        when(col_trimmed.rlike("^\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}$"),
               to_timestamp(col_trimmed, 'yyyy-MM-dd HH:mm:ss')),
        
        
        when(col_trimmed.rlike("^\\d{2}/\\d{2}/\\d{4} \\d{2}:\\d{2}:\\d{2}$"),
               to_timestamp(col_trimmed, 'dd/MM/yyyy HH:mm:ss')),
        
        
        when(col_trimmed.rlike("^\\d{2}-\\d{2}-\\d{4} \\d{2}:\\d{2}:\\d{2}$"),
               to_timestamp(col_trimmed, 'MM-dd-yyyy HH:mm:ss')),
        
        
        when(col_trimmed.rlike("^\\d{4}/\\d{2}/\\d{2} \\d{2}:\\d{2}:\\d{2}$"),
               to_timestamp(col_trimmed, 'yyyy/MM/dd HH:mm:ss')),
        
        
        when(col_trimmed.rlike("^\\d{4}-\\d{2}-\\d{2}$"),
               to_timestamp(col_trimmed, 'yyyy-MM-dd')),
        
        
        when(col_trimmed.rlike("^\\d{2}/\\d{2}/\\d{4}$"),
               to_timestamp(col_trimmed, 'dd/MM/yyyy')),
        
        
        when(col_trimmed.rlike("^\\d{2}/\\d{2}/\\d{4}$"),
               to_timestamp(col_trimmed, 'MM/dd/yyyy')),
        
        
        when(col_trimmed.rlike("^\\d{2}-\\d{2}-\\d{4}$"),
               to_timestamp(col_trimmed, 'MM-dd-yyyy')),
        
        
        when(col_trimmed.rlike("^\\d{2}-\\d{2}-\\d{4}$"),
               to_timestamp(col_trimmed, 'dd-MM-yyyy')),
        
        lit(None).cast(TimestampType())
    )

In [0]:
catalog_name = "webinar"
fecha_actual = datetime.now().date()

df = (
    spark.table(f"{catalog_name}.bronze.devoluciones")
    .withColumn("motivo_temp",
        when(col("motivo").isNull(), None)
        .when(trim(col("motivo")) == "N/A", None)
        .when(trim(col("motivo")) == "", None)
        .otherwise(lower(trim(col("motivo"))))
    )
    .withColumn("motivo",
        when(col("motivo_temp").isNull(), None)
        .otherwise(
            concat(
                upper(substring(col("motivo_temp"), 1, 1)),
                substring(col("motivo_temp"), 2, 1000)
            )
        )
    )
    .drop("motivo_temp")
    .withColumn("fecha_devolucion",
        parse_fecha_devolucion_safe("fecha_devolucion")
    )
    .withColumn("fecha_devolucion",
        when(
            (col("fecha_devolucion") > lit(fecha_actual)) | 
            (col("fecha_devolucion") < lit("2000-01-01")),
            None
        )
        .otherwise(col("fecha_devolucion"))
    )
    .dropDuplicates(["id_devolucion"])
    .withColumn("updated_at", current_timestamp())
    .select(
        col("id_devolucion").cast(LongType()),
        col("id_venta").cast(LongType()),
        col("motivo").cast(StringType()),
        col("fecha_devolucion").cast(TimestampType()),
        col("updated_at")
    )
)

df.write.format("delta").mode("overwrite").saveAsTable(f"{catalog_name}.silver.devoluciones")