In [0]:
from pyspark.sql.functions import col, round, sum, countDistinct, avg, to_date

df_silver = spark.read.format("delta").load("/Volumes/workspace/default/retail_lakehouse/silver")

df_silver = df_silver.withColumn("TotalVenta", round(col("Quantity") * col("UnitPrice"), 2))

df_kpi_pais = (
    df_silver
    .groupBy("Country")
    .agg(sum("TotalVenta").alias("VentasTotales"))
    .orderBy("VentasTotales", ascending=False)
)
df_kpi_pais.write.format("delta").mode("overwrite").save("/Volumes/workspace/default/retail_lakehouse/gold/kpi_ventas_pais")

df_kpi_productos = (
    df_silver
    .groupBy("Description")
    .agg(sum("TotalVenta").alias("TotalVendido"))
    .orderBy("TotalVendido", ascending=False)
)
df_kpi_productos.write.format("delta").mode("overwrite").save("/Volumes/workspace/default/retail_lakehouse/gold/kpi_productos_top")

df_kpi_dia = (
    df_silver
    .withColumn("Fecha", to_date("InvoiceDate"))
    .groupBy("Fecha")
    .agg(sum("TotalVenta").alias("VentasTotales"))
    .orderBy("Fecha")
)
df_kpi_dia.write.format("delta").mode("overwrite").save("/Volumes/workspace/default/retail_lakehouse/gold/kpi_ventas_dia")

df_kpi_clientes = (
    df_silver
    .groupBy("CustomerID")
    .agg(sum("TotalVenta").alias("TotalGastado"))
    .orderBy("TotalGastado", ascending=False)
)
df_kpi_clientes.write.format("delta").mode("overwrite").save("/Volumes/workspace/default/retail_lakehouse/gold/kpi_clientes_top")

df_kpi_ticket = (
    df_silver
    .groupBy("InvoiceNo")
    .agg(sum("TotalVenta").alias("MontoFactura"))
    .agg(round(avg("MontoFactura"), 2).alias("TicketPromedio"))
)
df_kpi_ticket.write.format("delta").mode("overwrite").save("/Volumes/workspace/default/retail_lakehouse/gold/kpi_ticket_promedio")

In [0]:
df_kpi_pais.createOrReplaceTempView("gold_ventas_por_pais")
df_kpi_productos.createOrReplaceTempView("gold_productos_top")
df_kpi_dia.createOrReplaceTempView("gold_ventas_diarias")
df_kpi_clientes.createOrReplaceTempView("gold_clientes_top")
df_kpi_ticket.createOrReplaceTempView("gold_ticket_promedio")

In [0]:
%sql
select * from gold_productos_top;
select * from gold_ventas_diarias