#databricks notebook: GOLD - FireRisk Di√°rio (INPE)

In [0]:
# Databricks notebook: GOLD - FireRisk Di√°rio (INPE)
# =========================================================

from pyspark.sql import SparkSession
from pyspark.sql.functions import (
    col, lit, avg, min as spark_min, max as spark_max, count
)

spark = SparkSession.builder.getOrCreate()

In [0]:
# =========================================================
# üîπ Par√¢metros
# =========================================================

dbutils.widgets.text("catalog", "")
dbutils.widgets.text("schema_silver", "")
dbutils.widgets.text("table_silver", "")
dbutils.widgets.text("schema_gold", "")
dbutils.widgets.text("table_gold", "")
dbutils.widgets.text("data_ref_carga", "")

catalog         = dbutils.widgets.get("catalog")
schema_silver   = dbutils.widgets.get("schema_silver")
table_silver    = dbutils.widgets.get("table_silver")
schema_gold     = dbutils.widgets.get("schema_gold")
table_gold      = dbutils.widgets.get("table_gold")
data_ref_carga  = dbutils.widgets.get("data_ref_carga")

if not data_ref_carga:
    raise ValueError("‚ùå Par√¢metro 'data_ref_carga' n√£o informado")

print("=====================================")
print(f"üìÖ Data refer√™ncia: {data_ref_carga}")
print(f"üì¶ Silver origem : {catalog}.{schema_silver}.{table_silver}")
print(f"üíæ Gold destino  : {catalog}.{schema_gold}.{table_gold}")
print("=====================================")

In [0]:
# =========================================================
# üîπ Leitura da Silver
# =========================================================

df_silver = spark.table(f"{catalog}.{schema_silver}.{table_silver}") \
    .filter(col("data_ref_carga") == data_ref_carga)

total_linhas = df_silver.count()
print(f"üî∏ Registros Silver lidos: {total_linhas}")

if total_linhas == 0:
    raise RuntimeError("‚ùå Silver vazia para esta data. Encerrando job.")

In [0]:
# =========================================================
# üîπ Agrega√ß√£o GOLD ‚Äî Resumo Di√°rio Brasil
# =========================================================
df_gold = (
    df_silver
    .groupBy("data_ref_carga")
    .agg(
        avg("rf").alias("rf_medio"),
        spark_min("rf").alias("rf_min"),
        spark_max("rf").alias("rf_max"),
        count("*").alias("qtde_pontos_grade")
    )
    .withColumn("processado_em", lit(current_timestamp()))
)

print("üî∏ Gold gerada com sucesso")

In [0]:
# =========================================================
# üîπ Escrita Gold (Delta)
# =========================================================
(
    df_gold.write
    .format("delta")
    .mode("overwrite")
    .option("replaceWhere", f"data_ref_carga = '{data_ref_carga}'")
    .saveAsTable(f"{catalog}.{schema_gold}.{table_gold}")
)

print(f"üíæ Gold salva em: {catalog}.{schema_gold}.{table_gold}")

In [0]:
# =========================================================
# üîπ Coment√°rios da GOLD
# =========================================================
spark.sql(f"""
COMMENT ON TABLE {catalog}.{schema_gold}.{table_gold} IS
'Tabela GOLD - Resumo di√°rio do risco de fogo INPE FireRisk 2.2 (m√©dia, m√≠nimo, m√°ximo, quantidade de pontos).';

COMMENT ON COLUMN {catalog}.{schema_gold}.{table_gold}.data_ref_carga IS 'Data de refer√™ncia AAAAMMDD';
COMMENT ON COLUMN {catalog}.{schema_gold}.{table_gold}.rf_medio IS 'Valor m√©dio do risco de fogo no dia';
COMMENT ON COLUMN {catalog}.{schema_gold}.{table_gold}.rf_min IS 'Menor valor de risco de fogo no dia';
COMMENT ON COLUMN {catalog}.{schema_gold}.{table_gold}.rf_max IS 'Maior valor de risco de fogo no dia';
COMMENT ON COLUMN {catalog}.{schema_gold}.{table_gold}.qtde_pontos_grade IS 'Quantidade de pixels analisados no grid para o dia';
COMMENT ON COLUMN {catalog}.{schema_gold}.{table_gold}.processado_em IS 'Timestamp de processamento na camada Gold';
""")

print("üìù Coment√°rios adicionados com sucesso!")