In [0]:
#Final Project - Hydro Generation Raw
#Kevin Carrion

#Drop widgets
# =========================
dbutils.widgets.removeAll()

# Databricks notebook source
# =========================
# Widgets
# =========================
dbutils.widgets.text("container", "raw")
dbutils.widgets.text("catalogo", "catalog_final_project")
dbutils.widgets.text("esquema", "bronze")
dbutils.widgets.text("datalake", "adlssdemoazure1201")

# COMMAND ----------

from pyspark.sql import functions as F
from pyspark.sql.types import *

container = dbutils.widgets.get("container")
catalogo  = dbutils.widgets.get("catalogo")
esquema   = dbutils.widgets.get("esquema")
datalake  = dbutils.widgets.get("datalake")

# Ruta en ADLS (DFS endpoint)
ruta = f"abfss://{container}@{datalake}.dfs.core.windows.net/hydro_generation_light.csv"

# Tabla destino en Bronze
tabla_destino = f"{catalogo}.{esquema}.hydro_generation_raw"

print("Ruta RAW:", ruta)
print("Tabla Bronze:", tabla_destino)

# COMMAND ----------

# =========================
# Schema
# =========================
gen_schema = StructType([
    StructField("plant_id", StringType(), True),
    StructField("plant_name", StringType(), True),
    StructField("datetime", StringType(), True),
    StructField("installed_capacity_mw", DoubleType(), True),
    StructField("actual_generation_mw", DoubleType(), True),
    StructField("water_flow_m3s", DoubleType(), True),
    StructField("turbine_efficiency", DoubleType(), True),
    StructField("outage_flag", IntegerType(), True),
])

df = (
    spark.read.format("csv")
    .option("header", "true")
    .schema(gen_schema)
    .load(ruta)
    .withColumn("_ingestion_ts", F.current_timestamp())
    .withColumn("_source_file", F.input_file_name())
)


# COMMAND ----------

# =========================
# Write Bronze (Delta Table)
# =========================
(
    df.write
    .format("delta")
    .mode("overwrite")
    .saveAsTable(tabla_destino)
)

print(f"OK: {tabla_destino}")