In [0]:
# NOTEBOOK: 05_Fix_Silver_Deduplication.ipynb
# OBJETIVO: Corrigir a duplicação na Silver Layer

# ===========================================================================
# CORREÇÃO DA SILVER LAYER - VERSÃO FINAL
# ===========================================================================

%sql
USE pl_delivery_analysis;

-- 1. PRIMEIRO: Verificar a qualidade dos dados originais
WITH base_analysis AS (
  SELECT 
    COUNT(DISTINCT o.order_id) as distinct_orders,
    COUNT(DISTINCT d.order_id) as distinct_deliveries,
    COUNT(DISTINCT p.order_id) as distinct_payments
  FROM tbl_fact_orders_bronze o
  LEFT JOIN tbl_fact_deliveries_bronze d ON o.order_id = d.order_id
  LEFT JOIN tbl_fact_payments_bronze p ON o.order_id = p.order_id
)
SELECT * FROM base_analysis;

-- 2. VERIFICAR deliveries duplicados por order_id
SELECT 
  order_id,
  COUNT(*) as delivery_count,
  COUNT(DISTINCT delivery_status) as distinct_statuses
FROM tbl_fact_deliveries_bronze 
GROUP BY order_id
HAVING COUNT(*) > 1
LIMIT 10;

-- 3. CORREÇÃO: Recriar a Silver Layer com deduplicação adequada
DROP TABLE IF EXISTS pl_delivery_analysis.tbl_fact_pedidos_silver;

CREATE TABLE pl_delivery_analysis.tbl_fact_pedidos_silver AS
WITH constantes AS (
  SELECT 0.18 AS comissao_plataforma, 0.70 AS repasse_entregador, 0.02 AS taxa_transacao
),

-- DEDUPLICAR deliveries: Para cada order_id, pegar APENAS UM delivery 'DELIVERED'
deliveries_dedup AS (
  SELECT 
    order_id,
    driver_id,
    delivery_status
  FROM (
    SELECT 
      *,
      ROW_NUMBER() OVER (
        PARTITION BY order_id 
        ORDER BY 
          CASE WHEN delivery_status = 'DELIVERED' THEN 1 ELSE 2 END,
          driver_id  -- critério de desempate
      ) as rn
    FROM pl_delivery_analysis.tbl_fact_deliveries_bronze
    WHERE delivery_status = 'DELIVERED'  -- FILTRAR APENAS ENTREGUES
  ) 
  WHERE rn = 1  -- Pegar apenas o primeiro registro por order_id
),

-- DEDUPLICAR payments: Para cada order_id, pegar APENAS UM payment
payments_dedup AS (
  SELECT 
    order_id,
    payment_method,
    CAST(payment_amount AS DECIMAL(10,2)) as payment_amount
  FROM (
    SELECT 
      *,
      ROW_NUMBER() OVER (
        PARTITION BY order_id 
        ORDER BY CAST(payment_amount AS DECIMAL(10,2)) DESC
      ) as rn
    FROM pl_delivery_analysis.tbl_fact_payments_bronze
  ) 
  WHERE rn = 1
),

-- JOIN FINAL: Garantir 1:1 entre orders, deliveries e payments
pedidos_limpos AS (
  SELECT
    o.order_id,
    o.store_id,
    d.driver_id,
    p.payment_method,
    CAST(o.subtotal AS DECIMAL(10, 2)) AS subtotal_bruto,
    CAST(o.delivery_fee AS DECIMAL(10, 2)) AS delivery_fee_cliente,
    o.created_at AS created_at_ts_str
  FROM pl_delivery_analysis.tbl_fact_orders_bronze o
  INNER JOIN deliveries_dedup d ON o.order_id = d.order_id  -- APENAS orders com delivery
  INNER JOIN payments_dedup p ON o.order_id = p.order_id   -- APENAS orders com payment
  WHERE CAST(o.subtotal AS DECIMAL(10, 2)) > 0  -- Garantir subtotal válido
)

SELECT
  p.order_id,
  p.store_id,
  p.driver_id,
  p.payment_method,
  p.subtotal_bruto,
  p.delivery_fee_cliente,
  p.created_at_ts_str,
  -- Métricas de Receita
  (p.subtotal_bruto + p.delivery_fee_cliente) AS gmv_total,
  (p.subtotal_bruto * c.comissao_plataforma) AS receita_comissao,
  (receita_comissao + (p.delivery_fee_cliente * (1 - c.repasse_entregador))) AS receita_liquida_plataforma,
  -- Métricas de Custo
  (p.delivery_fee_cliente * c.repasse_entregador) AS cogs_logistico_simulado,
  ((p.subtotal_bruto + p.delivery_fee_cliente) * c.taxa_transacao) AS cogs_transacao_simulado,
  -- Unit Economics
  (receita_liquida_plataforma - cogs_logistico_simulado - cogs_transacao_simulado) AS lucro_bruto_unitario
FROM pedidos_limpos p
CROSS JOIN constantes c;

-- 4. VERIFICAÇÃO DA CORREÇÃO
SELECT 
  'Silver Layer Corrigida' as etapa,
  COUNT(*) as total_registros,
  COUNT(DISTINCT order_id) as orders_unicos,
  AVG(lucro_bruto_unitario) as margem_media
FROM pl_delivery_analysis.tbl_fact_pedidos_silver;