In [0]:
#Final Project - Hydro Generation Raw
#Kevin Carrion

#Drop widgets
dbutils.widgets.removeAll()

# Databricks notebook source
# =========================
# Widgets
# =========================
dbutils.widgets.text("container", "raw")
dbutils.widgets.text("catalogo", "catalog_final_project")
dbutils.widgets.text("esquema", "bronze")
dbutils.widgets.text("datalake", "adlssdemoazure1201")

# COMMAND ----------

from pyspark.sql import functions as F
from pyspark.sql.types import *

container = dbutils.widgets.get("container")
catalogo  = dbutils.widgets.get("catalogo")
esquema   = dbutils.widgets.get("esquema")
datalake  = dbutils.widgets.get("datalake")

# Ajusta aqu√≠ si el nombre real difiere
ruta = f"abfss://{container}@{datalake}.dfs.core.windows.net/hydrology_data_light.csv"

tabla_destino = f"{catalogo}.{esquema}.hydrology_raw"

print("Ruta RAW:", ruta)
print("Tabla Bronze:", tabla_destino)

# COMMAND ----------

# =========================
# Schema
# =========================
hyd_schema = StructType([
    StructField("river_basin", StringType(), True),
    StructField("plant_id", StringType(), True),
    StructField("datetime", StringType(), True),  # se parsea en SILVER
    StructField("reservoir_level_m", DoubleType(), True),
    StructField("inflow_m3s", DoubleType(), True),
    StructField("rainfall_mm", DoubleType(), True),
    StructField("temperature_c", DoubleType(), True),
])

df = (
    spark.read.format("csv")
    .option("header", "true")
    .schema(hyd_schema)
    .load(ruta)
    .withColumn("_ingestion_ts", F.current_timestamp())
    .withColumn("_source_file", F.input_file_name())
)


# COMMAND ----------

# =========================
# Write Bronze (Delta Table)
# =========================
(
    df.write
    .format("delta")
    .mode("overwrite")
    .saveAsTable(tabla_destino)
)

print(f"OK: {tabla_destino}")