In [0]:
%pip install python-dotenv

In [0]:
# ============================================
# BRONZE INGESTA - NORTHWIND (CSV + overwriteSchema)
# ============================================

from datetime import datetime
from dotenv import load_dotenv
import os
import pandas as pd

# --------------------------------------------
# CONFIGURACIÓN
# --------------------------------------------
catalog = "northwind"
schema = "bronze"

# Cargar el .env
load_dotenv("/Workspace/Users/renzo_hc@outlook.com/data_engineer/.env")

spark.sql(f"USE CATALOG {catalog}")
spark.sql(f"USE SCHEMA {schema}")

base_url = os.getenv("BASE_URL_NORTHWIND")

# --------------------------------------------
# ARCHIVOS CSV (NOMBRES CORRECTOS)
# --------------------------------------------
tables = [
    "categories",
    "customers",
    "employee_territories",
    "employees",
    "order_details",
    "orders",
    "products",
    "regions",
    "shippers",
    "suppliers",
    "territories"
]

# --------------------------------------------
# INGESTA DE ARCHIVOS CSV
# --------------------------------------------
for table in tables:
    try:
        print(f"📥 Descargando CSV: {table}")
        url = f"{base_url}{table}.csv"

        # Descargar CSV en pandas
        df_pd = pd.read_csv(url)

        # Agregar columna de metadato
        fecha_ingesta = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
        df_pd["fecha_ingesta"] = fecha_ingesta

        # Convertir a Spark DataFrame
        df_spark = spark.createDataFrame(df_pd)

        # Guardar como tabla Delta en Bronze, con overwriteSchema habilitado
        delta_table = f"{schema}.{table}"
        df_spark.write \
            .option("overwriteSchema", "true") \
            .mode("overwrite") \
            .format("delta") \
            .saveAsTable(delta_table)
        
        print(f"✅ Bronze Table creada: {delta_table}")

    except Exception as e:
        print(f"⚠️ Error procesando {table}: {e}")

print("🚀 Ingesta Bronze desde CSV completa ✅")
