# Parámetros

In [0]:
# catalog = "sales"
# silver_schema = "sales_silver"
# silver_table = "sales_curated"
# gold_schema = "sales_gold"
# gold_fact_table = "fact_sales"
# dim_time_table = "dim_time"
# dim_product_table = "dim_product"
# dim_seller_table = "dim_seller"
# dim_store_table = "dim_store"

In [0]:
#Obtención de los parametros
try:
    catalog = dbutils.widgets.get("catalog")
    silver_schema = dbutils.widgets.get("silver_schema")
    silver_table = dbutils.widgets.get("silver_table")
    gold_schema = dbutils.widgets.get("gold_schema")
    gold_fact_table = dbutils.widgets.get("gold_fact_table")
    dim_time_table = dbutils.widgets.get("dim_time_table")
    dim_product_table = dbutils.widgets.get("dim_product_table")
    dim_seller_table = dbutils.widgets.get("dim_seller_table")
    dim_store_table = dbutils.widgets.get("dim_store_table")
    print("Parámetros cargados exitosamente.")
except Exception as e:
    print(f"Error fatal: No se pudieron obtener los parámetros")
    print(f"Detalle del error: {e}")
    raise Exception("Error al obtener parámetros")

# Cracion tabla Fact_Sales si no existe

In [0]:
try:
  create_query = f"""
  CREATE TABLE IF NOT EXISTS {catalog}.{gold_schema}.{gold_fact_table} (
    idSales BIGINT GENERATED ALWAYS AS IDENTITY,
    InvoiceLineNo STRING,
    idProduct BIGINT,
    idSeller BIGINT,
    idDate BIGINT,
    idStore BIGINT,
    sale_bottles INTEGER,
    sale_dollars DECIMAL(10, 2),
    sale_margin DECIMAL(10, 2),
    total_cost DECIMAL(10, 2),
    sale_liters DECIMAL(10, 3),
    sale_cases INTEGER,
    state_bottle_cost DECIMAL(10, 2),
    state_bottle_retail DECIMAL(10, 2),
    StoreDay TIMESTAMP
  )
  USING DELTA
  """
  spark.sql(create_query)
  print(f"Tabla {gold_fact_table} asegurada")
except Exception as e:
    print(f"Error fatal: Fallo durante la creación de la tabla {gold_fact_table}")
    print(f"Detalle del error: {e}")
    raise Exception(f"Error en DDL para {gold_fact_table}. Verifique la sintaxis o los permisos.")

# Carga de Fact_Sales

In [0]:
try:
  merge_query = f"""
  MERGE INTO {catalog}.{gold_schema}.{gold_fact_table} AS target
  USING (
      -- Unimos Silver con todas las Dimensiones Gold
      -- para encontrar las llaves primarias correctas (idProduct, idTime, etc.)
      SELECT DISTINCT
          -- Llave Natural de la transacción
          sls.invoice_line_no, 
          
          -- Llaves Foráneas (obtenidas de las dimensiones)
          p.idProduct,
          sel.idSeller,
          t.dateId,
          s.idStore,

          -- Medidas (directo de Silver)
          sls.sale_bottles,
          sls.sale_dollars,
          sls.sale_margin,
          sls.total_cost,
          sls.sale_liters,
          sls.sale_cases,
          sls.state_bottle_cost,
          sls.state_bottle_retail,
          current_timestamp() AS StoreDay

      FROM {catalog}.{silver_schema}.{silver_table} AS sls
      
      -- --- JOINS A DIMENSIONES ---
      
      -- Join con DimProduct
      LEFT JOIN {catalog}.{gold_schema}.{dim_product_table} AS p
          ON sls.itemno = p.itemno
          
      -- Join con DimSeller
      LEFT JOIN {catalog}.{gold_schema}.{dim_seller_table} AS sel
          ON sls.vendor_no = sel.vendor_no

      -- Join con DimStore
      LEFT JOIN {catalog}.{gold_schema}.{dim_store_table} AS s
          ON sls.store_number = s.store_number

      -- Join con DimTime (aquí la llave natural es la 'date')
      LEFT JOIN {catalog}.{gold_schema}.{dim_time_table} AS t
          ON sls.date = t.date

  ) AS source

  -- --- Condición de MERGE ---
  --Se unen target y source usando la Llave Natural de la transacción: InvoiceLineNo
  ON target.InvoiceLineNo = source.invoice_line_no

  -- --- Lógica de Carga ---

  -- CASO MATCHED: La venta ya existe.
  --    Actualizamos todas las medidas y llaves por si hubo correcciones.
  WHEN MATCHED THEN
    UPDATE SET
      target.idProduct = source.idProduct,
      target.idSeller = source.idSeller,
      target.idDate = source.dateId,
      target.idStore = source.idStore,
      target.sale_bottles = source.sale_bottles,
      target.sale_dollars = source.sale_dollars,
      target.sale_margin = source.sale_margin,
      target.total_cost = source.total_cost,
      target.sale_liters = source.sale_liters,
      target.sale_cases = source.sale_cases,
      target.state_bottle_cost = source.state_bottle_cost,
      target.state_bottle_retail = source.state_bottle_retail,
      target.StoreDay = source.StoreDay

  --CASO NOT MATCHED: Es una venta nueva.
  --    La insertamos.
  WHEN NOT MATCHED THEN
    INSERT (
      InvoiceLineNo,
      idProduct,
      idSeller,
      idDate,
      idStore,
      sale_bottles,
      sale_dollars,
      sale_margin,
      total_cost,
      sale_liters,
      sale_cases,
      state_bottle_cost,
      state_bottle_retail,
      StoreDay
    )
    VALUES (
      source.invoice_line_no,
      source.idProduct,
      source.idSeller,
      source.dateId,
      source.idStore,
      source.sale_bottles,
      source.sale_dollars,
      source.sale_margin,
      source.total_cost,
      source.sale_liters,
      source.sale_cases,
      source.state_bottle_cost,
      source.state_bottle_retail,
      source.StoreDay
    )
  """

  # --- Ejecución ---
  spark.sql(merge_query)
  print(f"Carga de datos completada en '{gold_fact_table}' mediante MERGE.")

except Exception as e:
    print(f"Error fatal: Fallo durante el MERGE en {gold_fact_table}")
    print(f"Detalle del error: {e}")
    raise Exception(f"Error en DML para {gold_fact_table}. Verifique los JOINs con las dimensiones o la lógica del MERGE.")