# Golden Layer — KPIs Agregados para Power BI
**Fuente:** `catalog_termoplanta.silver.historian_kpis`  
**Destino:**  
- `catalog_termoplanta.golden.kpi_semanal` — resumen semanal por condición operativa  
- `catalog_termoplanta.golden.kpi_diario` — resumen diario para tendencias  
**Descripción:** Agrega KPIs filtrados (sin outliers) para consumo directo en Power BI.

In [0]:
%python
CATALOG = "adbsmartdatajpc"

from pyspark.sql import functions as F

# Leer silver — solo registros sin outliers y con condición operativa definida
df_silver = (
    spark.table(f"{CATALOG}.silver.historian_kpis")
    .filter(~F.col("es_outlier"))
    .filter(F.col("condicion_operativa") != "Otro")
)

print(f"Registros silver (sin outliers): {df_silver.count():,}")

Registros silver (sin outliers): 63,798


In [0]:
%python
# ============================================================
# TABLA GOLDEN 1: KPI SEMANAL POR CONDICIÓN OPERATIVA
# Replica la lógica del reporte() del notebook original
# ============================================================
kpi_cols = {
    "HR":         "heat_rate_neto",
    "HR_bruto":   "heat_rate_bruto",
    "n_TG11":     "eficiencia_tg11",
    "n_TG12":     "eficiencia_tg12",
    "n_HRSG11":   "eficiencia_hrsg11",
    "n_HRSG12":   "eficiencia_hrsg12",
    "n_Box1":     "efectividad_box1",
    "n_Box2":     "efectividad_box2",
    "n_c_TG11":   "eficiencia_comp_tg11",
    "n_c_TG12":   "eficiencia_comp_tg12",
    "potencia_total_mw": "potencia_total_mw"
}

# Agregaciones: mean, min, max, stddev por semana y condición
agg_exprs = []
for src, dst in kpi_cols.items():
    agg_exprs += [
        F.avg(F.col(src)).alias(f"{dst}_mean"),
        F.min(F.col(src)).alias(f"{dst}_min"),
        F.max(F.col(src)).alias(f"{dst}_max"),
        F.stddev(F.col(src)).alias(f"{dst}_std"),
    ]

df_golden_semanal = (
    df_silver
    .groupBy("year", "semana", "condicion_operativa")
    .agg(
        F.min("fecha").alias("fecha_inicio"),
        F.max("fecha").alias("fecha_fin"),
        F.count("*").alias("n_registros"),
        *agg_exprs
    )
    .withColumn("calculation_ts", F.current_timestamp())
    .orderBy("year", "semana", "condicion_operativa")
)

print(f"Registros en KPI semanal: {df_golden_semanal.count():,}")
df_golden_semanal.show(5)

Registros en KPI semanal: 120
+----+------+-------------------+------------+----------+-----------+-------------------+------------------+------------------+------------------+--------------------+-------------------+-------------------+-------------------+--------------------+-------------------+-------------------+--------------------+--------------------+-------------------+-------------------+-------------------+----------------------+---------------------+---------------------+---------------------+----------------------+---------------------+---------------------+---------------------+---------------------+--------------------+--------------------+--------------------+---------------------+--------------------+--------------------+--------------------+-------------------------+------------------------+------------------------+------------------------+-------------------------+------------------------+------------------------+------------------------+----------------------+-------

In [0]:
%python
# ============================================================
# TABLA GOLDEN 2: KPI DIARIO — Para tendencias en Power BI
# ============================================================
df_golden_diario = (
    df_silver
    .groupBy("fecha", "condicion_operativa")
    .agg(
        F.avg("HR").alias("heat_rate_neto_mean"),
        F.avg("HR_bruto").alias("heat_rate_bruto_mean"),
        F.avg("n_TG11").alias("eficiencia_tg11_mean"),
        F.avg("n_TG12").alias("eficiencia_tg12_mean"),
        F.avg("n_HRSG11").alias("eficiencia_hrsg11_mean"),
        F.avg("n_HRSG12").alias("eficiencia_hrsg12_mean"),
        F.avg("n_Box1").alias("efectividad_box1_mean"),
        F.avg("n_Box2").alias("efectividad_box2_mean"),
        F.avg("n_c_TG11").alias("eficiencia_comp_tg11_mean"),
        F.avg("n_c_TG12").alias("eficiencia_comp_tg12_mean"),
        F.avg("potencia_total_mw").alias("potencia_media_mw"),
        F.max("potencia_total_mw").alias("potencia_max_mw"),
        F.min("potencia_total_mw").alias("potencia_min_mw"),
        F.sum(
            F.col("potencia_total_mw") * (5.0/60.0)  # MWh cada 5 min
        ).alias("energia_mwh"),
        F.count("*").alias("n_registros")
    )
    .withColumn("calculation_ts", F.current_timestamp())
    .orderBy("fecha", "condicion_operativa")
)

print(f"Registros en KPI diario: {df_golden_diario.count():,}")
df_golden_diario.show(5)

Registros en KPI diario: 498
+----------+-------------------+-------------------+--------------------+--------------------+--------------------+----------------------+----------------------+---------------------+---------------------+-------------------------+-------------------------+------------------+------------------+-----------------+------------------+-----------+--------------------+
|     fecha|condicion_operativa|heat_rate_neto_mean|heat_rate_bruto_mean|eficiencia_tg11_mean|eficiencia_tg12_mean|eficiencia_hrsg11_mean|eficiencia_hrsg12_mean|efectividad_box1_mean|efectividad_box2_mean|eficiencia_comp_tg11_mean|eficiencia_comp_tg12_mean| potencia_media_mw|   potencia_max_mw|  potencia_min_mw|       energia_mwh|n_registros|      calculation_ts|
+----------+-------------------+-------------------+--------------------+--------------------+--------------------+----------------------+----------------------+---------------------+---------------------+-------------------------+--------

In [0]:
%python
# ============================================================
# ESCRITURA GOLDEN — Ambas tablas en Delta
# ============================================================

# Tabla semanal
(
    df_golden_semanal
    .write
    .format("delta")
    .mode("overwrite")
    .option("overwriteSchema", "true")
    .partitionBy("year", "condicion_operativa")
    .saveAsTable(f"{CATALOG}.golden.kpi_semanal")
)
print(f"✅ Escrita: {CATALOG}.golden.kpi_semanal")

# Tabla diaria
(
    df_golden_diario
    .write
    .format("delta")
    .mode("overwrite")
    .option("overwriteSchema", "true")
    .saveAsTable(f"{CATALOG}.golden.kpi_diario")
)
print(f"✅ Escrita: {CATALOG}.golden.kpi_diario")

✅ Escrita: adbsmartdatajpc.golden.kpi_semanal
✅ Escrita: adbsmartdatajpc.golden.kpi_diario


In [0]:
%python
# ============================================================
# VERIFICACIÓN FINAL — Vista previa para Power BI
# ============================================================
print("=== KPI Semanal — Carga Base CC2x1 ===")
spark.table(f"{CATALOG}.golden.kpi_semanal") \
    .filter(F.col("condicion_operativa") == "Carga_Base_CC2x1") \
    .select("semana", "fecha_inicio", "fecha_fin",
            "heat_rate_neto_mean", "eficiencia_tg11_mean",
            "eficiencia_tg12_mean", "potencia_total_mw_mean") \
    .orderBy("semana") \
    .show(10)

print("=== KPI Diario — Últimos 7 días ===")
spark.table(f"{CATALOG}.golden.kpi_diario") \
    .orderBy(F.col("fecha").desc()) \
    .show(7)

=== KPI Semanal — Carga Base CC2x1 ===
+------+------------+----------+-------------------+--------------------+--------------------+----------------------+
|semana|fecha_inicio| fecha_fin|heat_rate_neto_mean|eficiencia_tg11_mean|eficiencia_tg12_mean|potencia_total_mw_mean|
+------+------------+----------+-------------------+--------------------+--------------------+----------------------+
|     1|  2025-01-01|2025-01-04|  6869.145133974254|   37.14765927984575|   37.20969485647648|     536.8315070175439|
|     2|  2025-01-06|2025-01-12| 6862.8433054452435|   37.20310445563478|  37.307279185921516|     544.0693107611547|
|     3|  2025-01-13|2025-01-15| 6858.1774948893135|   37.20812654230359|  37.296234681584586|     543.7387881235153|
|     4|  2025-01-20|2025-01-24|  6846.711175439238|   37.26972962097896|   37.30421300890405|     540.3745433988761|
|     5|  2025-01-27|2025-02-01|  6853.341883749352|  37.198852766378714|  37.265273178139005|            540.156126|
|     6|  2025-02