In [0]:
#Drop Widgets
dbutils.widgets.removeAll()

# Databricks notebook source
# =========================
# Widgets
# =========================
dbutils.widgets.text("catalogo", "catalog_final_project")
dbutils.widgets.text("esquema_source", "silver")
dbutils.widgets.text("esquema_sink", "gold")

# COMMAND ----------

from pyspark.sql import functions as F

catalogo = dbutils.widgets.get("catalogo")
esq_src  = dbutils.widgets.get("esquema_source")
esq_sink = dbutils.widgets.get("esquema_sink")

tabla_silver = f"{catalogo}.{esq_src}.plant_operational_data"
tabla_daily  = f"{catalogo}.{esq_sink}.daily_kpis"
tabla_sum    = f"{catalogo}.{esq_sink}.plant_performance_summary"

print("SILVER (source):", tabla_silver)
print("GOLD daily (sink):", tabla_daily)
print("GOLD summary (sink):", tabla_sum)

# COMMAND ----------

df = spark.table(tabla_silver)

# 15-min interval -> 0.25h
interval_h = F.lit(0.25)

df_daily = (
    df
    .withColumn("date", F.to_date("datetime"))
    .groupBy("plant_id", "plant_name", "date")
    .agg(
        F.sum(F.col("actual_generation_mw") * interval_h).alias("energy_mwh"),
        F.avg("capacity_factor").alias("avg_capacity_factor"),
        F.sum(F.when(F.col("outage_flag") == 1, interval_h).otherwise(F.lit(0.0))).alias("outage_hours"),
        F.sum(
            F.when(
                F.col("outage_flag") == 1,
                (F.col("installed_capacity_mw") - F.col("actual_generation_mw")) * interval_h
            ).otherwise(F.lit(0.0))
        ).alias("lost_energy_mwh"),
        F.avg("turbine_efficiency").alias("avg_turbine_efficiency"),
        F.avg("inflow_m3s").alias("avg_inflow_m3s")
    )
    # supuesto simple para demo (si no lo quieres, elimina estas 2 l√≠neas)
    .withColumn("loss_of_profit_usd", F.col("lost_energy_mwh") * F.lit(50.0))
    .withColumn("_gold_ts", F.current_timestamp())
)


# COMMAND ----------

(
    df_daily.write
    .format("delta")
    .mode("overwrite")
    .partitionBy("plant_id")
    .saveAsTable(tabla_daily)
)

df_summary = (
    df_daily
    .groupBy("plant_id", "plant_name")
    .agg(
        F.sum("energy_mwh").alias("energy_mwh_total"),
        F.avg("avg_capacity_factor").alias("avg_capacity_factor_overall"),
        F.sum("outage_hours").alias("outage_hours_total"),
        F.sum("lost_energy_mwh").alias("lost_energy_mwh_total"),
        F.sum("loss_of_profit_usd").alias("loss_of_profit_usd_total")
    )
    .withColumn("_gold_ts", F.current_timestamp())
)

(
    df_summary.write
    .format("delta")
    .mode("overwrite")
    .saveAsTable(tabla_sum)
)

print(f"OK: {tabla_daily}")
print(f"OK: {tabla_sum}")