In [0]:
# Notebook: sql/03_gold_dimensional_model/03_gold_model.ipynb

# Configurações iniciais
processo_nome = 'gold_dimensional_model'
status_final = 'sucesso'
detalhes_log = 'Modelagem Gold concluída.'
count_fact = 0
count_store = 0
count_time = 0

try:
    # 1. LOG INICIAL
    spark.sql(f"""
    INSERT INTO pl_delivery_analysis.log_processamento (processo, etapa, status, timestamp, detalhes)
    VALUES ('{processo_nome}', 'inicio', 'executando', CURRENT_TIMESTAMP(), 'Iniciando modelagem dimensional Gold');
    """)

    # 2. DEFINIR FUNÇÃO DE DATA (UDF)
    # Necessária para converter 'M/d/yyyy h:mm:ss a'
    spark.sql("""
    CREATE OR REPLACE FUNCTION pl_delivery_analysis.convert_datetime_to_date(datetime_str STRING)
    RETURNS DATE
    RETURN TO_DATE(
        TO_TIMESTAMP(datetime_str, 'M/d/yyyy h:mm:ss a')
    );
    """)

    # 3. CRIAR DIMENSÃO LOJA (Stores + Hubs)
    spark.sql("""
    CREATE OR REPLACE TABLE pl_delivery_analysis.tbl_dim_store_gold AS
    SELECT 
        s.store_id,
        s.store_segment,
        s.hub_id,
        h.city
    FROM pl_delivery_analysis.tbl_dim_stores_bronze s
    LEFT JOIN pl_delivery_analysis.tbl_dim_hubs_bronze h ON s.hub_id = h.hub_id;
    """)
    count_store = spark.sql("SELECT COUNT(*) as total FROM pl_delivery_analysis.tbl_dim_store_gold").collect()[0]['total']

    # 4. CRIAR DIMENSÃO TEMPO
    # Extrai datas únicas dos pedidos Silver e cria calendário
    spark.sql("""
    CREATE OR REPLACE TABLE pl_delivery_analysis.tbl_dim_time_gold AS
    SELECT DISTINCT
        d.date_key,
        YEAR(d.date_key) AS year,
        MONTH(d.date_key) AS month,
        DAYOFMONTH(d.date_key) AS day,
        WEEKDAY(d.date_key) AS day_of_week_num,
        CASE 
            WHEN MONTH(d.date_key) IN (1, 2, 3) THEN 'Q1'
            WHEN MONTH(d.date_key) IN (4, 5, 6) THEN 'Q2'
            WHEN MONTH(d.date_key) IN (7, 8, 9) THEN 'Q3'
            ELSE 'Q4'
        END AS quarter
    FROM (
        SELECT
            pl_delivery_analysis.convert_datetime_to_date(created_at_ts_str) AS date_key
        FROM pl_delivery_analysis.tbl_fact_pedidos_silver
    ) d
    WHERE d.date_key IS NOT NULL;
    """)
    count_time = spark.sql("SELECT COUNT(*) as total FROM pl_delivery_analysis.tbl_dim_time_gold").collect()[0]['total']

    # 5. CRIAR TABELA FATO (Fato Delivery)
    spark.sql("""
    CREATE OR REPLACE TABLE pl_delivery_analysis.tbl_fato_delivery_gold AS
    SELECT
        p.order_id,
        p.store_id,
        p.driver_id,
        p.payment_method,
        pl_delivery_analysis.convert_datetime_to_date(p.created_at_ts_str) AS order_date_key,
        p.subtotal_bruto,
        p.delivery_fee_cliente,
        p.gmv_total,
        p.receita_liquida_plataforma,
        p.cogs_logistico_simulado,
        p.cogs_transacao_simulado,
        p.lucro_bruto_unitario
    FROM
        pl_delivery_analysis.tbl_fact_pedidos_silver p;
    """)
    
    # Otimização Z-ORDER para performance no Power BI
    spark.sql("""
    OPTIMIZE pl_delivery_analysis.tbl_fato_delivery_gold
    ZORDER BY (order_date_key, store_id);
    """)

    count_fact = spark.sql("SELECT COUNT(*) as total FROM pl_delivery_analysis.tbl_fato_delivery_gold").collect()[0]['total']

    # 6. COMENTÁRIOS NA TABELA (Documentação)
    spark.sql("""
    COMMENT ON TABLE pl_delivery_analysis.tbl_fato_delivery_gold IS 'Tabela fato principal com métricas de Unit Economics. ZORDERed por data e loja.'
    """)

except Exception as e:
    status_final = 'falha'
    erro_msg = str(e).replace("'", "")
    detalhes_log = f"Erro na modelagem Gold: {erro_msg}"
    print(f"ERRO CRÍTICO: {detalhes_log}")
    raise e

finally:
    # 7. LOG FINAL
    msg_final = f"{detalhes_log} | Fatos: {count_fact}, Lojas: {count_store}, Datas: {count_time}"
    
    spark.sql(f"""
    INSERT INTO pl_delivery_analysis.log_processamento (processo, etapa, status, timestamp, detalhes)
    VALUES ('{processo_nome}', 'fim', '{status_final}', CURRENT_TIMESTAMP(), '{msg_final}');
    """)
    
    print(msg_final)