#databricks notebook: GOLD - FireRisk Di√°rio (INPE)

In [0]:
# Databricks notebook: GOLD - FireRisk Di√°rio (INPE)
# =========================================================

from pyspark.sql import SparkSession
from pyspark.sql.functions import (
    col, lit, avg, min as spark_min, max as spark_max, count, current_timestamp 
)

spark = SparkSession.builder.getOrCreate()

In [0]:
# =========================================================
# üîπ Par√¢metros
# =========================================================

dbutils.widgets.text("catalog", "")
dbutils.widgets.text("schema_silver", "")
dbutils.widgets.text("table_silver", "")
dbutils.widgets.text("schema_gold", "")
dbutils.widgets.text("table_gold", "")
dbutils.widgets.text("data_ref_carga", "")

catalog         = dbutils.widgets.get("catalog")
schema_silver   = dbutils.widgets.get("schema_silver")
table_silver    = dbutils.widgets.get("table_silver")
schema_gold     = dbutils.widgets.get("schema_gold")
table_gold      = dbutils.widgets.get("table_gold")
data_ref_carga  = dbutils.widgets.get("data_ref_carga")

if not data_ref_carga:
    raise ValueError("‚ùå Par√¢metro 'data_ref_carga' n√£o informado")

print("=====================================")
print(f"üì¶ Silver origem : {catalog}.{schema_silver}.{table_silver}")
print(f"üíæ Gold destino  : {catalog}.{schema_gold}.{table_gold}")
print(f"üìÖ Data solicitada: {data_ref_carga if data_ref_carga else 'N√ÉO INFORMADA'}")
print("=====================================")

In [0]:
# =========================================================
# üîπ Localizar √∫ltima parti√ß√£o v√°lida na Silver
# =========================================================

silver_full = spark.table(f"{catalog}.{schema_silver}.{table_silver}") \
                    .select("data_ref_carga") \
                    .distinct()

datas_disponiveis = [r["data_ref_carga"] for r in silver_full.collect()]

if not datas_disponiveis:
    raise RuntimeError("‚ùå Nenhuma parti√ß√£o dispon√≠vel na Silver.")

# Ordenar datas dispon√≠veis
datas_ordenadas = sorted(datas_disponiveis)

# Caso usu√°rio informe a data
if data_ref_carga:
    # Buscar a √∫ltima ‚â§ data informada
    datas_validas = [d for d in datas_ordenadas if d <= data_ref_carga]

    if not datas_validas:
        raise RuntimeError(
            f"‚ùå Nenhuma parti√ß√£o encontrada ‚â§ {data_ref_carga} na Silver."
        )

    data_usada = datas_validas[-1]  # √∫ltima menor ou igual
else:
    # Nenhuma data informada ‚Üí usa a √∫ltima parti√ß√£o existente
    data_usada = datas_ordenadas[-1]

print(f"üìå Data utilizada para GOLD: {data_usada}")

In [0]:
# =========================================================
# Leitura da Silver
# =========================================================
df_silver = spark.table(f"{catalog}.{schema_silver}.{table_silver}") \
                 .filter(col("data_ref_carga") == data_usada)

total_linhas = df_silver.count()
print(f"üî∏ Registros Silver lidos: {total_linhas}")

if total_linhas == 0:
    raise RuntimeError(f"‚ùå Silver vazia para data {data_usada}. Encerrando job.")

In [0]:
# =========================================================
# üîπ Agrega√ß√£o GOLD ‚Äî Resumo Di√°rio Brasil
# =========================================================
df_gold = (
    df_silver
    .groupBy("data_ref_carga")
    .agg(
        avg("rf").alias("rf_medio"),
        spark_min("rf").alias("rf_min"),
        spark_max("rf").alias("rf_max"),
        count("*").alias("qtde_pontos_grade")
    )
    .withColumn("processado_em", current_timestamp())
)

print("üî∏ Gold gerada com sucesso")

In [0]:
# =========================================================
# üîπ Escrita Gold (Delta)
# =========================================================
(
    df_gold.write
    .format("delta")
    .mode("overwrite")
    .option("replaceWhere", f"data_ref_carga = '{data_ref_carga}'")
    .saveAsTable(f"{catalog}.{schema_gold}.{table_gold}")
)

print(f"üíæ Gold salva em: {catalog}.{schema_gold}.{table_gold}")

In [0]:
# =========================================================
# üîπ Coment√°rios da GOLD (Unity Catalog)
# =========================================================

# Coment√°rio da tabela
spark.sql(f"""
ALTER TABLE {catalog}.{schema_gold}.{table_gold}
SET TBLPROPERTIES (
  'comment' = 'Tabela GOLD - Resumo di√°rio do risco de fogo INPE FireRisk 2.2 (m√©dia, m√≠nimo, m√°ximo, quantidade de pontos).'
)
""")

# Coment√°rios das colunas
spark.sql(f"""
ALTER TABLE {catalog}.{schema_gold}.{table_gold}
ALTER COLUMN data_ref_carga COMMENT 'Data de refer√™ncia AAAAMMDD'
""")

spark.sql(f"""
ALTER TABLE {catalog}.{schema_gold}.{table_gold}
ALTER COLUMN rf_medio COMMENT 'Valor m√©dio do risco de fogo no dia'
""")

spark.sql(f"""
ALTER TABLE {catalog}.{schema_gold}.{table_gold}
ALTER COLUMN rf_min COMMENT 'Menor valor de risco de fogo no dia'
""")

spark.sql(f"""
ALTER TABLE {catalog}.{schema_gold}.{table_gold}
ALTER COLUMN rf_max COMMENT 'Maior valor de risco de fogo no dia'
""")

spark.sql(f"""
ALTER TABLE {catalog}.{schema_gold}.{table_gold}
ALTER COLUMN qtde_pontos_grade COMMENT 'Quantidade de pixels analisados no grid para o dia'
""")

spark.sql(f"""
ALTER TABLE {catalog}.{schema_gold}.{table_gold}
ALTER COLUMN processado_em COMMENT 'Timestamp de processamento na camada Gold'
""")

print("üìù Coment√°rios da GOLD adicionados com sucesso! ‚úÖ")
