# Parámetros

In [0]:
# catalog = "sales"
# silver_schema = "sales_silver"
# silver_table = "sales_curated"
# gold_schema = "sales_gold"
# dim_product_table = "dim_product"
# dim_location_table = "dim_location"
# dim_store_table = "dim_store"
# dim_seller_table =  "dim_seller"

In [0]:
#Obtención de los parametros
try: 
    catalog = dbutils.widgets.get("catalog")
    silver_schema = dbutils.widgets.get("silver_schema")
    silver_table = dbutils.widgets.get("silver_table")
    gold_schema = dbutils.widgets.get("gold_schema")
    dim_product_table = dbutils.widgets.get("dim_product_table")
    dim_location_table = dbutils.widgets.get("dim_location_table")
    dim_store_table = dbutils.widgets.get("dim_store_table")
    dim_seller_table = dbutils.widgets.get("dim_seller_table")
    print("Parámetros cargados exitosamente.")

except Exception as e:
    print(f"Error fatal: No se pudieron obtener los parámetros")
    print(f"Detalle del error: {e}")
    raise Exception("Error al obtener parámetros")


# Crea tabla Dim_Product si no existe

In [0]:
try:
    #Creo la tabla Dim_Product si no existe   
    create_query = f"""
    CREATE TABLE IF NOT EXISTS {catalog}.{gold_schema}.{dim_product_table} (
        idProduct BIGINT GENERATED ALWAYS AS IDENTITY,
        itemno INTEGER,
        itemName STRING,
        pack INTEGER,
        bottle_volume_ml INT,
        category STRING,
        StoreDay TIMESTAMP
    )
    USING DELTA
    """
    spark.sql(create_query)
    print(f"Tabla {dim_product_table} asegurada")

except Exception as e:
    print(f"Error fatal: Fallo durante la creación de la tabla {dim_product_table}")
    print(f"Detalle del error: {e}")
    raise Exception(f"Error en DDL para {dim_product_table}. Verifique la sintaxis")

# Carga de Dim_Product

In [0]:
try:
  #Queria de carga de la dimension Dim_Product
  merge_query = f"""
  MERGE INTO {catalog}.{gold_schema}.{dim_product_table} AS target
  USING (
    SELECT * FROM (
      SELECT
        itemno,
        im_desc,
        pack,
        bottle_volume_ml,
        category_name,
        ROW_NUMBER() OVER (
          PARTITION BY itemno
          ORDER BY date DESC
        ) AS rn
      FROM {catalog}.{silver_schema}.{silver_table})
    WHERE rn = 1
  ) AS source
  ON target.itemno = source.itemno

  -- Un producto puede cambiar de nombre, pack, categoria, bottle_volume_ml
  WHEN MATCHED AND (target.itemName <> source.im_desc OR target.pack <> source.pack OR target.bottle_volume_ml <> source.bottle_volume_ml OR target.category <> source.category_name) THEN
    UPDATE SET
      itemName = source.im_desc,
      pack = source.pack,
      bottle_volume_ml = source.bottle_volume_ml,
      category = source.category_name,
      StoreDay = current_timestamp()
  -- Cuando el producto no se encuentre en la tabla dimensional se lo agrega
  WHEN NOT MATCHED THEN
    INSERT (itemno, itemName, pack, bottle_volume_ml, category, StoreDay)
    VALUES (source.itemno, source.im_desc, source.pack, source.bottle_volume_ml, source.category_name, current_timestamp())
  """
  spark.sql(merge_query)
  print(f"Merge en {dim_product_table} exitoso")
except Exception as e:
  print(f"Error fatal: Fallo durante el MERGE en {dim_product_table}")
  print(f"Detalle del error: {e}")
  raise Exception(f"Error en DML para {dim_product_table}. Verifique la subconsulta 'USING' o la lógica del MERGE.")


# Crea Dim_Seller si no existe

In [0]:
try:
    #Se crea la tabla Dim_Seller en caso que no exista
    create_query = f"""
    CREATE TABLE IF NOT EXISTS {catalog}.{gold_schema}.{dim_seller_table} (
        idSeller BIGINT GENERATED ALWAYS AS IDENTITY,
        vendor_no INTEGER,
        vendor_name STRING,
        StoreDay TIMESTAMP
    )
    USING DELTA
    """
    spark.sql(create_query)
    print(f"Tabla {dim_seller_table} asegurada")

except Exception as e:
    print(f"Error fatal: Fallo durante la creación de la tabla {dim_seller_table}")
    print(f"Detalle del error: {e}")
    raise Exception(f"Error en DDL para {dim_seller_table}. Verifique la sintaxis")

# Carga de Dim_Seller

In [0]:
try:
  #Cargo la tabla dimensional Dim_Seller
  merge_query = f"""
  MERGE INTO {catalog}.{gold_schema}.{dim_seller_table} AS target
  USING (
    SELECT * FROM (
      SELECT
        vendor_no,
        vendor_name,
        ROW_NUMBER() OVER (
            PARTITION BY vendor_no
            ORDER BY date DESC
        ) AS rn
      FROM {catalog}.{silver_schema}.{silver_table})
    WHERE rn = 1
  ) AS source
  ON target.vendor_no = source.vendor_no
  -- Se considera que el vendedor puede cambiar de nombre
  WHEN MATCHED AND (target.vendor_name <> source.vendor_name)THEN
    UPDATE SET
      vendor_name = source.vendor_name,
      StoreDay = current_timestamp()
  -- Cuando no se encuentra el identificador de vendedor es agregado a la dimension
  WHEN NOT MATCHED THEN
    INSERT (vendor_no, vendor_name, StoreDay)
    VALUES (source.vendor_no, source.vendor_name, current_timestamp())
  """
  spark.sql(merge_query)
  print(f"Merge en {dim_seller_table} exitoso")
except Exception as e:
  print(f"Error fatal: Fallo durante el MERGE en {dim_seller_table}")
  print(f"Detalle del error: {e}")
  raise Exception(f"Error en DML para {dim_seller_table}. Verifique la subconsulta 'USING' o la lógica del MERGE.")

#Crea Dim_Location si no existe

In [0]:
try:
    #Query de creacion de la tabla Dim_Location en caso de que no exista
    create_query = f"""
    CREATE TABLE IF NOT EXISTS {catalog}.{gold_schema}.{dim_location_table} (
        idLocation BIGINT GENERATED ALWAYS AS IDENTITY,
        county STRING,
        city STRING,
        zipcode STRING,
        address STRING,
        StoreDay TIMESTAMP
    )
    USING DELTA
    """
    spark.sql(create_query)
    print(f"Tabla {dim_location_table} asegurada")

except Exception as e:
    print(f"Error fatal: Fallo durante la creación de la tabla {dim_location_table}")
    print(f"Detalle del error: {e}")
    raise Exception(f"Error en DDL para {dim_location_table}. Verifique la sintaxis")

#Carga Dim_Location

In [0]:
try:
  merge_query = f"""
  MERGE INTO {catalog}.{gold_schema}.{dim_location_table} AS target
  USING (
    SELECT DISTINCT
      county,
      city,
      zipcode,
      address
    FROM {catalog}.{silver_schema}.{silver_table}
  ) AS source
  ON target.county = source.county
    AND target.city = source.city
    AND target.zipcode = source.zipcode
    AND target.address = source.address
  -- Los registros de la dimension Location no pueden cambiar
  WHEN NOT MATCHED THEN
    INSERT (county, city, zipcode, address, StoreDay)
    VALUES (source.county, source.city, source.zipcode, source.address, current_timestamp())
  """
  spark.sql(merge_query)
  print(f"Merge en {dim_location_table} exitoso")
except Exception as e:
  print(f"Error fatal: Fallo durante el MERGE en {dim_location_table}")
  print(f"Detalle del error: {e}")
  raise Exception(f"Error en DML para {dim_location_table}. Verifique la subconsulta 'USING' o la lógica del MERGE.")

# Crea Dim_Store si no existe

In [0]:
try:
    #Creacion de la tabla dimensional dim_store en caso de que no exista
    create_query = f"""
    CREATE TABLE IF NOT EXISTS {catalog}.{gold_schema}.{dim_store_table} (
        idStore BIGINT GENERATED ALWAYS AS IDENTITY,
        idLocation BIGINT,
        store_number INTEGER,
        store_name STRING,
        StoreDay TIMESTAMP
    )
    USING DELTA
    """
    spark.sql(create_query)
    print(f"Tabla {dim_store_table} asegurada")
except Exception as e:
    print(f"Error fatal: Fallo durante la creación de la tabla {dim_store_table}")
    print(f"Detalle del error: {e}")
    raise Exception(f"Error en DDL para {dim_store_table}. Verifique la sintaxis")

# Carga Dim_Store

In [0]:
try:
  merge_query = f"""
  MERGE INTO {catalog}.{gold_schema}.{dim_store_table} AS target
  USING (
    SELECT * FROM (
      SELECT
        location.idLocation,
        store.store_name,
        store.store_number,
        ROW_NUMBER() OVER (
          PARTITION BY store.store_number
          ORDER BY store.date DESC
        ) AS rn
      FROM {catalog}.{silver_schema}.{silver_table} AS store
      -- Se rescata el id de dim_location
      INNER JOIN {catalog}.{gold_schema}.{dim_location_table} AS location
        ON store.county = location.county
        AND store.city = location.city
        AND store.zipcode = location.zipcode
        AND store.address = location.address)
    WHERE rn = 1
    
  ) AS source
  ON target.store_number = source.store_number

  -- Se considera que el nombre y la ubicacion de la tienda puede cambiar
  WHEN MATCHED AND (target.store_name <> source.store_name OR target.idLocation <> source.idLocation) THEN
    UPDATE SET
      store_name = source.store_name,
      idLocation = source.idLocation,
      StoreDay = current_timestamp()
  -- Cuando no haya coincidencia por clave natural se inserta el registro
  WHEN NOT MATCHED THEN
    INSERT (idLocation, store_number, store_name, StoreDay)
    VALUES (source.idLocation, source.store_number, source.store_name, current_timestamp())
  """
  spark.sql(merge_query)
  print(f"Merge en {dim_store_table} exitoso")
except Exception as e:
  print(f"Error fatal: Fallo durante el MERGE en {dim_store_table}")
  print(f"Detalle del error: {e}")
  raise Exception(f"Error en DML para {dim_store_table}. Verifique la subconsulta 'USING' (incluyendo el JOIN con {dim_location_table}) o la lógica del MERGE.")

