In [0]:
from pyspark.sql import functions as F

# ============================================================
# CONFIGURACION
# ============================================================
CATALOGO      = "santig_120781"
SILVER_TABLE  = f"{CATALOGO}.silver.ecommerce_sales"
GOLD_TABLE    = f"{CATALOGO}.gold.ecommerce_report"

print("Iniciando construccion de capa Gold Ecommerce...")

# ============================================================
# LECTURA DESDE SILVER
# ============================================================
df_silver = spark.read.table(SILVER_TABLE)

# ============================================================
# CONSTRUCCION DE TABLA GOLD
# ============================================================

# 1. Clasificacion de descuento
df_gold = df_silver.withColumn(
    "discount_level",
    F.when(F.col("Discount") == 0,               "Sin descuento")
     .when(F.col("Discount") < 0.1,              "Descuento bajo")
     .when(F.col("Discount") < 0.3,              "Descuento medio")
     .otherwise(                                  "Descuento alto")
)

# 2. Clasificacion de ROI de marketing
df_gold = df_gold.withColumn(
    "marketing_roi",
    F.round(F.col("net_revenue") / F.col("Marketing_Spend"), 2)
).withColumn(
    "roi_category",
    F.when(F.col("marketing_roi") >= 3,  "ROI Alto")
     .when(F.col("marketing_roi") >= 1,  "ROI Medio")
     .otherwise(                         "ROI Bajo")
)

# 3. Clasificacion de volumen de venta
df_gold = df_gold.withColumn(
    "sales_volume_category",
    F.when(F.col("Units_Sold") >= 80,  "Volumen Alto")
     .when(F.col("Units_Sold") >= 40,  "Volumen Medio")
     .otherwise(                       "Volumen Bajo")
)

# ============================================================
# SELECCION FINAL DE COLUMNAS PARA POWER BI
# ============================================================
df_final = df_gold.select(
    "Date",
    "year",
    "month",
    "month_name",
    "Product_Category",
    "Customer_Segment",
    "Price",
    "Discount",
    "discount_amount",
    "discount_level",
    "Units_Sold",
    "sales_volume_category",
    "Marketing_Spend",
    "marketing_roi",
    "roi_category",
    "total_revenue",
    "net_revenue",
    F.current_timestamp().alias("_process_date")
).dropDuplicates()

# ============================================================
# ESCRITURA EN GOLD
# ============================================================
df_final.write.format("delta") \
    .mode("overwrite") \
    .option("overwriteSchema", "true") \
    .saveAsTable(GOLD_TABLE)

# OPTIMIZACION
spark.sql(f"OPTIMIZE {GOLD_TABLE} ZORDER BY (Date, Product_Category)")

print(f"OK: Tabla {GOLD_TABLE} creada con {df_final.count()} registros.")

# ============================================================
# VISTA PARA POWER BI
# ============================================================
spark.sql(f"""
CREATE OR REPLACE VIEW {CATALOGO}.gold.vw_dashboard_ecommerce AS
SELECT
    Date,
    year,
    month,
    month_name,
    Product_Category,
    Customer_Segment,
    Price,
    Discount,
    discount_amount,
    discount_level,
    Units_Sold,
    sales_volume_category,
    Marketing_Spend,
    marketing_roi,
    roi_category,
    total_revenue,
    net_revenue,
    1 as cantidad_transacciones
FROM {GOLD_TABLE}
""")

print("OK: Vista vw_dashboard_ecommerce creada. Lista para importar en Power BI.")

# ============================================================
# AUDITORIA FINAL
# ============================================================
print("\nDistribucion por Categoria:")
df_final.groupBy("Product_Category").agg(
    F.count("*").alias("transacciones"),
    F.round(F.sum("net_revenue"), 2).alias("ingresos_netos")
).orderBy(F.col("ingresos_netos").desc()).show()

print("\nDistribucion por Segmento de Cliente:")
df_final.groupBy("Customer_Segment").agg(
    F.count("*").alias("transacciones"),
    F.round(F.avg("marketing_roi"), 2).alias("roi_promedio")
).show()

Iniciando construccion de capa Gold Ecommerce...
OK: Tabla santig_120781.gold.ecommerce_report creada con 1000 registros.
OK: Vista vw_dashboard_ecommerce creada. Lista para importar en Power BI.

Distribucion por Categoria:
+----------------+-------------+--------------+
|Product_Category|transacciones|ingresos_netos|
+----------------+-------------+--------------+
|      Home Decor|          190|-5.595600923E7|
|     Electronics|          210|-7.264407569E7|
|         Fashion|          190| -7.49813399E7|
|          Sports|          206|-7.658386683E7|
|            Toys|          204|-8.125132253E7|
+----------------+-------------+--------------+


Distribucion por Segmento de Cliente:
+----------------+-------------+------------+
|Customer_Segment|transacciones|roi_promedio|
+----------------+-------------+------------+
|         Premium|          316|     -177.37|
|         Regular|          345|     -148.76|
|      Occasional|          339|     -191.97|
+----------------+---------