# Databricks notebook: Silver - FireRisk Di√°rio (INPE)

In [0]:
# =========================================================
# Converte dados NetCDF (Bronze) em tabela Silver padronizada
# =========================================================

from pyspark.sql import SparkSession
from pyspark.sql.functions import col, when, lit, to_timestamp
from pyspark.sql.functions import  max as spark_max
spark = SparkSession.builder.getOrCreate()

In [0]:
# =========================================================
# üîπ Par√¢metros do Job
# =========================================================
dbutils.widgets.text("catalog", "")
dbutils.widgets.text("schema_bronze", "")
dbutils.widgets.text("table_bronze", "")
dbutils.widgets.text("schema_silver", "")
dbutils.widgets.text("table_silver", "")
dbutils.widgets.text("data_ref_carga", "")

catalog         = dbutils.widgets.get("catalog")
schema_bronze   = dbutils.widgets.get("schema_bronze")
table_bronze    = dbutils.widgets.get("table_bronze")
schema_silver   = dbutils.widgets.get("schema_silver")
table_silver    = dbutils.widgets.get("table_silver")
data_ref_carga  = dbutils.widgets.get("data_ref_carga")

if not data_ref_carga:
    raise ValueError("‚ùå Par√¢metro 'data_ref_carga' n√£o informado")

print("=====================================")
print(f"üìÖ Data de refer√™ncia : {data_ref_carga}")
print(f"üì¶ Bronze origem      : {catalog}.{schema_bronze}.{table_bronze}")
print(f"üíæ Silver destino     : {catalog}.{schema_silver}.{table_silver}")
print("=====================================")

In [0]:
bronze_path = f"{catalog}.{schema_bronze}.{table_bronze}"

In [0]:
# =========================================================
# üîπ 1. Verifica se existe parti√ß√£o para data_ref_carga
# =========================================================
df_particoes = spark.sql(f"""
    SELECT DISTINCT data_ref_carga 
    FROM {bronze_path}
""")

# Se existir a parti√ß√£o solicitada, usamos ela
particao_existe = (
    df_particoes.filter(col("data_ref_carga") == data_ref_carga).count() > 0
)

if particao_existe:
    print(f"‚úÖ Usando parti√ß√£o solicitada: {data_ref_carga}")
    data_ref_final = data_ref_carga
else:
    # =========================================================
    # üîπ 2. Busca a parti√ß√£o mais recente dispon√≠vel
    # =========================================================
    ultima_particao = (
        df_particoes
        .agg(spark_max("data_ref_carga").alias("max"))
        .collect()[0]["max"]
    )

    if ultima_particao is None:
        raise ValueError("‚ùå Nenhuma parti√ß√£o encontrada na tabela Bronze.")

    print(f"‚ö†Ô∏è Parti√ß√£o {data_ref_carga} n√£o encontrada.")
    print(f"‚û°Ô∏è Usando a mais recente dispon√≠vel: {ultima_particao}")

    data_ref_final = ultima_particao

In [0]:
# =========================================================
# üîπ 3. Le leitura final da Bronze com a parti√ß√£o escolhida
# =========================================================
df_bronze = (
    spark.table(bronze_path)
    .filter(col("data_ref_carga") == data_ref_final)
)

print(f"üìå Linhas carregadas: {df_bronze.count()}")

In [0]:
# =========================================================
# üîπ Limpeza e enriquecimento
# =========================================================
df_silver = (
    df_bronze
    .filter(col("rf").isNotNull())  # remove pontos sem valor
    .withColumn("nivel_risco",
        when(col("rf") < 0.2, "baixo")
        .when(col("rf") < 0.4, "moderado")
        .when(col("rf") < 0.6, "alto")
        .when(col("rf") < 0.8, "muito alto")
        .otherwise("extremo")
    )
    .withColumn("data_ref_carga", lit(data_ref_carga))
)

print(f"‚úÖ Linhas ap√≥s limpeza: {df_silver.count()}")

In [0]:
# =========================================================
# üîπ Escrita na Tabela Silver (Delta)
# =========================================================
(
    df_silver.write
    .format("delta")
    .mode("overwrite")
    .option("replaceWhere", f"data_ref_carga = '{data_ref_carga}'")
    .partitionBy("data_ref_carga")
    .saveAsTable(f"{catalog}.{schema_silver}.{table_silver}")
)

print(f"üíæ Dados gravados em: {catalog}.{schema_silver}.{table_silver}")

In [0]:
# =========================================================
# üîπ Adiciona descri√ß√µes (comments) na tabela Silver
# =========================================================
# Coment√°rio na tabela
spark.sql(f"""
ALTER TABLE {catalog}.{schema_silver}.{table_silver}
SET TBLPROPERTIES ('comment' = 'Tabela Silver do modelo INPE Fire Risk 2.2. Representa o risco di√°rio de fogo em grade (latitude, longitude).')
""")

# Coment√°rios nas colunas
spark.sql(f"""
ALTER TABLE {catalog}.{schema_silver}.{table_silver}
ALTER COLUMN lat COMMENT 'Latitude em graus decimais'
""")

spark.sql(f"""
ALTER TABLE {catalog}.{schema_silver}.{table_silver}
ALTER COLUMN lon COMMENT 'Longitude em graus decimais'
""")

spark.sql(f"""
ALTER TABLE {catalog}.{schema_silver}.{table_silver}
ALTER COLUMN rf COMMENT '√çndice de risco de fogo (0 a 1)'
""")

spark.sql(f"""
ALTER TABLE {catalog}.{schema_silver}.{table_silver}
ALTER COLUMN nivel_risco COMMENT 'Classifica√ß√£o textual do risco de fogo'
""")

spark.sql(f"""
ALTER TABLE {catalog}.{schema_silver}.{table_silver}
ALTER COLUMN data_ref_carga COMMENT 'Data de refer√™ncia do processamento (AAAAMMDD)'
""")

print("üìù Coment√°rios adicionados com sucesso no Unity Catalog! ‚úÖ")

