In [0]:
# ============================================
# SILVER INGESTA - NORTHWIND
# ============================================

from pyspark.sql.functions import col, to_date, lit
import re
import uuid
from datetime import datetime

# --------------------------------------------
# CONFIGURACIÓN
# --------------------------------------------
catalog = "northwind"
bronze_schema = "bronze"
silver_schema = "silver"

spark.sql(f"USE CATALOG {catalog}")
spark.sql(f"USE SCHEMA {silver_schema}")

# --------------------------------------------
# LISTA DE TABLAS EN BRONZE
# --------------------------------------------
tables = [
    "categories",
    "customers",
    "employee_territories",
    "employees",
    "order_details",
    "orders",
    "products",
    "regions",
    "shippers",
    "suppliers",
    "territories"
]

# --------------------------------------------
# FUNCIONES DE LIMPIEZA
# --------------------------------------------

def camel_to_snake(name):
    """
    Converts camelCase or PascalCase to snake_case.
    """
    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
    return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()

def limpiar_columnas(df):
    """
    Normaliza nombres de columnas a snake_case
    """
    for c in df.columns:
        new_name = camel_to_snake(c.strip().replace(" ", "_"))
        df = df.withColumnRenamed(c, new_name)
    return df

def convertir_fechas(df, cols):
    """
    Convierte columnas de fecha en formato DATE
    """
    for c in cols:
        if c in df.columns:
            df = df.withColumn(c, to_date(col(c)))
    return df

# --------------------------------------------
# INGESTA DE TABLAS A SILVER
# --------------------------------------------

for table in tables:
    try:
        print(f"📥 Procesando Bronze Table: {table}")

        # 1️⃣ Leer desde Bronze
        df = spark.table(f"{bronze_schema}.{table}")

        # 2️⃣ Limpiar nombres de columnas (snake_case)
        df = limpiar_columnas(df)

        # 3️⃣ Convertir columnas de fecha si aplican
        df = convertir_fechas(df, ["order_date", "required_date", "shipped_date"])

        # 4️⃣ Agregar columnas de auditoría
        ingest_date = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
        source_system = 'northwind_csv'
        ingest_user = 'databricks_pipeline'
        load_id = str(uuid.uuid4())

        df = df.withColumn("ingest_date", lit(ingest_date)) \
               .withColumn("source_file", lit(f"{table}.csv")) \
               .withColumn("source_system", lit(source_system)) \
               .withColumn("ingest_user", lit(ingest_user)) \
               .withColumn("load_id", lit(load_id))

        # 5️⃣ Escribir como tabla Delta en Silver con prefijo t_
        delta_table = f"{silver_schema}.t_{table}"
        df.write.mode("overwrite").format("delta").saveAsTable(delta_table)

        print(f"✅ Silver Table creada: {delta_table}")

    except Exception as e:
        print(f"⚠️ Error procesando {table}: {e}")

print("🚀 Ingesta Silver completa ✅")
