# Bronze Layer — Ingesta de Datos del Historiador


In [0]:
%python
dbutils.widgets.removeAll()

In [0]:
%python
dbutils.widgets.text("storage_account", "adlssmartdatajpc")
dbutils.widgets.text("catalogo",        "catalog_termoplanta")
dbutils.widgets.text("source_file",     "BD_EXAQUAUNTUM.csv")
dbutils.widgets.text("account_key",     "")

STORAGE     = dbutils.widgets.get("storage_account")
CATALOG     = dbutils.widgets.get("catalogo")
SOURCE_FILE = dbutils.widgets.get("source_file")
ACCOUNT_KEY = dbutils.widgets.get("account_key")

RAW_PATH    = f"abfss://raw@{STORAGE}.dfs.core.windows.net/"
BRONZE_PATH = f"abfss://bronze@{STORAGE}.dfs.core.windows.net/"

print(f"Storage      : {STORAGE}")
print(f"Catálogo     : {CATALOG}")
print(f"Archivo fuente: {SOURCE_FILE}")
print(f"RAW_PATH     : {RAW_PATH}")

Storage      : adlssmartdatajpc
Catálogo     : catalog_termoplanta
Archivo fuente: BD_EXAQUAUNTUM.csv
RAW_PATH     : abfss://raw@adlssmartdatajpc.dfs.core.windows.net/


In [0]:
%python
# ============================================================
# CONFIGURACIÓN DE ACCESO AL STORAGE
# TODO: Reemplazar por Managed Identity al finalizar el flujo
# ============================================================
spark.conf.set(
    f"fs.azure.account.key.{STORAGE}.dfs.core.windows.net",
    ACCOUNT_KEY
)
print("Acceso al storage configurado")

Acceso al storage configurado


---
## RAW — Lectura desde el contenedor raw

In [0]:
%python
from pyspark.sql import functions as F

# Lectura cruda sin transformaciones
df_raw = (
    spark.read
    .option("header",      "true")
    .option("sep",         ";")
    .option("encoding",    "UTF-8")
    .option("inferSchema", "true")
    .csv(f"{RAW_PATH}{SOURCE_FILE}")
)

print(f"Registros raw leidos: {df_raw.count():,}")
print(f"Columnas            : {len(df_raw.columns)}")
df_raw.printSchema()

Registros raw leidos: 96,327
Columnas            : 24
root
 |-- HORA: timestamp (nullable = true)
 |-- G1_DWATT: double (nullable = true)
 |-- G2_DWATT: double (nullable = true)
 |-- S1_DWATT: double (nullable = true)
 |-- G1_FQG: double (nullable = true)
 |-- G2_FQG: double (nullable = true)
 |-- G1_TTXM: double (nullable = true)
 |-- 11TI1870: double (nullable = true)
 |-- 00TI8002: double (nullable = true)
 |-- G2_TTXM: double (nullable = true)
 |-- 12TI1870: double (nullable = true)
 |-- 10TI6591A: double (nullable = true)
 |-- 10TI6595A: double (nullable = true)
 |-- 10TI3005: double (nullable = true)
 |-- 10TI6591B: double (nullable = true)
 |-- 10TI6595B: double (nullable = true)
 |-- 10JI8128: double (nullable = true)
 |-- 10JI8129: double (nullable = true)
 |-- G1_CTIM: double (nullable = true)
 |-- G1_CPR: double (nullable = true)
 |-- G1_CTD: double (nullable = true)
 |-- G2_CTIM: double (nullable = true)
 |-- G2_CPR: double (nullable = true)
 |-- G2_CTD: double (nullable = 

In [0]:
%python
df_raw.limit(5).display()

HORA,G1_DWATT,G2_DWATT,S1_DWATT,G1_FQG,G2_FQG,G1_TTXM,11TI1870,00TI8002,G2_TTXM,12TI1870,10TI6591A,10TI6595A,10TI3005,10TI6591B,10TI6595B,10JI8128,10JI8129,G1_CTIM,G1_CPR,G1_CTD,G2_CTIM,G2_CPR,G2_CTD
2025-01-01T00:00:00Z,152.5106,152.4505,165.8148,19.3625,19.3346,1141.8014,187.1679,67.3071,1144.6307,192.0112,58.2015,72.8271,83.6596,58.1934,73.3537,7.6469,3.7811,51.9382,13.958,682.0012817,52.92628098,13.8769865,675.1011963
2025-01-01T00:05:00Z,122.0084,121.4062,162.6954,16.6269,16.5747,1172.174,185.3114,67.3071,1171.2681,190.2126,58.278,72.8189,84.1075,58.2707,73.3498,7.6582,3.7072,51.6043,12.0115,651.3428955,52.71135712,11.96950531,647.7202148
2025-01-01T00:10:00Z,131.4392,131.3696,157.4397,17.4702,17.4654,1163.5741,182.2714,67.3069,1161.6884,186.721,58.216,72.2225,83.2513,58.2136,72.6957,7.6239,3.747,51.6124,12.6012,661.0903931,53.17776108,12.59300041,656.9326782
2025-01-01T00:15:00Z,130.377,130.287,156.4772,17.3814,17.3602,1164.3119,181.6497,67.3132,1163.6444,186.5496,58.1203,71.9883,83.0042,58.1302,72.3812,7.589,3.7684,51.3061,12.5268,659.5918579,53.17028809,12.51332188,656.1364136
2025-01-01T00:20:00Z,130.2997,130.2322,157.4181,17.3453,17.3158,1164.4963,182.7185,67.3071,1163.7173,187.4857,58.2028,72.0428,82.6763,58.178,72.5569,7.5639,3.7487,51.1168,12.4957,658.9372559,53.41108322,12.4913187,655.9759521


---
## BRONZE — Preparación y escritura Delta

In [0]:
%python
# Renombrar columnas que empiezan con numero
df_bronze = (
    df_raw
    .withColumnRenamed("11TI1870", "TI1870_11")
    .withColumnRenamed("00TI8002", "TI8002_amb")
    .withColumnRenamed("12TI1870", "TI1870_12")
)
print("Columnas renombradas")

Columnas renombradas


In [0]:
%python
# Añadir trazabilidad — sin transformar el dato
df_bronze = (
    df_bronze
    .withColumn("HORA",           F.to_timestamp(F.col("HORA"), "yyyy-MM-dd HH:mm:ss.SSS"))
    .withColumnRenamed("HORA",    "timestamp")
    .withColumn("source_file",    F.lit(SOURCE_FILE))
    .withColumn("ingestion_date", F.current_timestamp())
)

print(f"Registros bronze: {df_bronze.count():,}")

df_bronze.select(
    F.min("timestamp").alias("fecha_inicio"),
    F.max("timestamp").alias("fecha_fin"),
    F.sum(F.col("timestamp").isNull().cast("int")).alias("timestamps_nulos")
).display()

Registros bronze: 96,327


fecha_inicio,fecha_fin,timestamps_nulos
2025-01-01T00:00:00Z,2025-12-01T12:00:00Z,0


In [0]:
# LIMITACIÓN: Unity Catalog requiere External Locations registradas para MANAGED LOCATION
# SOLUCIÓN TEMPORAL: Crear schemas sin MANAGED LOCATION (usarán ubicación del catalog)
# SOLUCIÓN PERMANENTE: Admin debe crear External Locations en Unity Catalog UI para:
#   - abfss://metastore@adlssmartdatajpc.dfs.core.windows.net/catalog_termoplanta/
#   - abfss://bronze@adlssmartdatajpc.dfs.core.windows.net/
#   - abfss://silver@adlssmartdatajpc.dfs.core.windows.net/
#   - abfss://golden@adlssmartdatajpc.dfs.core.windows.net/

STORAGE = "adlssmartdatajpc"
CATALOG = "adbsmartdatajpc"  # Usar catalog existente

spark.sql(f"CREATE SCHEMA IF NOT EXISTS {CATALOG}.bronze")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {CATALOG}.silver")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {CATALOG}.golden")

print(f"✅ Schemas creados en catalog: {CATALOG}")
print(f"⚠️  NOTA: Los datos se guardarán en la ubicación por defecto del catalog")
print(f"   Para usar contenedores específicos (bronze/silver/golden), contactar admin")

✅ Schemas creados en catalog: adbsmartdatajpc
⚠️  NOTA: Los datos se guardarán en la ubicación por defecto del catalog
   Para usar contenedores específicos (bronze/silver/golden), contactar admin


In [0]:
CATALOG = "adbsmartdatajpc"  # Usar catalog existente

(
    df_bronze
    .write
    .format("delta")
    .mode("overwrite")
    .option("overwriteSchema", "true")
        
    .saveAsTable(f"{CATALOG}.bronze.historian_signals")
)
print(f"Tabla bronze escrita: {CATALOG}.bronze.historian_signals")

Tabla bronze escrita: adbsmartdatajpc.bronze.historian_signals


In [0]:
CATALOG = "adbsmartdatajpc"  # Usar catalog existente

df_check = spark.table(f"{CATALOG}.bronze.historian_signals")
print(f"Registros en tabla bronze: {df_check.count():,}")
df_check.limit(5).display()

Registros en tabla bronze: 96,327


timestamp,G1_DWATT,G2_DWATT,S1_DWATT,G1_FQG,G2_FQG,G1_TTXM,TI1870_11,TI8002_amb,G2_TTXM,TI1870_12,10TI6591A,10TI6595A,10TI3005,10TI6591B,10TI6595B,10JI8128,10JI8129,G1_CTIM,G1_CPR,G1_CTD,G2_CTIM,G2_CPR,G2_CTD,source_file,ingestion_date
2025-01-01T00:00:00Z,152.5106,152.4505,165.8148,19.3625,19.3346,1141.8014,187.1679,67.3071,1144.6307,192.0112,58.2015,72.8271,83.6596,58.1934,73.3537,7.6469,3.7811,51.9382,13.958,682.0012817,52.92628098,13.8769865,675.1011963,BD_EXAQUAUNTUM.csv,2026-02-23T04:06:20.83695Z
2025-01-01T00:05:00Z,122.0084,121.4062,162.6954,16.6269,16.5747,1172.174,185.3114,67.3071,1171.2681,190.2126,58.278,72.8189,84.1075,58.2707,73.3498,7.6582,3.7072,51.6043,12.0115,651.3428955,52.71135712,11.96950531,647.7202148,BD_EXAQUAUNTUM.csv,2026-02-23T04:06:20.83695Z
2025-01-01T00:10:00Z,131.4392,131.3696,157.4397,17.4702,17.4654,1163.5741,182.2714,67.3069,1161.6884,186.721,58.216,72.2225,83.2513,58.2136,72.6957,7.6239,3.747,51.6124,12.6012,661.0903931,53.17776108,12.59300041,656.9326782,BD_EXAQUAUNTUM.csv,2026-02-23T04:06:20.83695Z
2025-01-01T00:15:00Z,130.377,130.287,156.4772,17.3814,17.3602,1164.3119,181.6497,67.3132,1163.6444,186.5496,58.1203,71.9883,83.0042,58.1302,72.3812,7.589,3.7684,51.3061,12.5268,659.5918579,53.17028809,12.51332188,656.1364136,BD_EXAQUAUNTUM.csv,2026-02-23T04:06:20.83695Z
2025-01-01T00:20:00Z,130.2997,130.2322,157.4181,17.3453,17.3158,1164.4963,182.7185,67.3071,1163.7173,187.4857,58.2028,72.0428,82.6763,58.178,72.5569,7.5639,3.7487,51.1168,12.4957,658.9372559,53.41108322,12.4913187,655.9759521,BD_EXAQUAUNTUM.csv,2026-02-23T04:06:20.83695Z
