In [0]:
from pyspark.sql.functions import when, coalesce, to_date, substring, initcap, trim, col, lit, lower, current_timestamp, concat, upper, to_timestamp, expr, round
from pyspark.sql.types import LongType, StringType, DateType, TimestampType, DecimalType

In [0]:
catalog_name = "webinar"

df = (
    spark.table(f"{catalog_name}.bronze.productos")
    .withColumn("nombre", initcap(trim(col("nombre"))))
    .withColumn("categoria",
        when(col("categoria").isNull(), None)
        .otherwise(initcap(trim(col("categoria"))))
    )
    .withColumn("precio_num",
        expr("try_cast(precio as DOUBLE)")
    )
    .withColumn("precio",
        when(
            (col("precio_num").isNull()) | (col("precio_num") <= 0),
            None
        ).otherwise(round(col("precio_num"), 2))
    )
    .drop("precio_num")
    .dropDuplicates(["id_producto"])
    .withColumn("updated_at", current_timestamp())
    .select(
        col("id_producto").cast(LongType()),
        col("nombre").cast(StringType()),
        col("categoria").cast(StringType()),
        col("precio").cast(DecimalType(18, 2)),
        col("updated_at")
    )

)

df.write.format("delta").mode("overwrite").saveAsTable(f"{catalog_name}.silver.productos")