#Databricks notebook: Bronze - Risco de Fogo(Di√°rio)

In [0]:
# ================================================================
# Databricks notebook: Bronze - INPE Fire Risk (Di√°rio)
# ================================================================
from pyspark.sql import SparkSession
from pyspark.sql.functions import lit
from datetime import datetime
import xarray as xr
import pandas as pd
import os

spark = SparkSession.builder.getOrCreate()

In [0]:
# ================================================================
# Par√¢metros recebidos via Databricks Job
# ================================================================
dbutils.widgets.text("catalog", "")
dbutils.widgets.text("schema", "")
dbutils.widgets.text("table", "")
dbutils.widgets.text("path_raw", "")
dbutils.widgets.text("data_ref_carga", "")

catalog         = dbutils.widgets.get("catalog")
schema          = dbutils.widgets.get("schema")
table           = dbutils.widgets.get("table")
path_raw        = dbutils.widgets.get("path_raw")
data_ref_carga  = dbutils.widgets.get("data_ref_carga")

In [0]:
# ================================================================
# Valida√ß√£o de par√¢metros
# ================================================================
if not data_ref_carga:
    raise ValueError("‚ùå Par√¢metro 'data_ref_carga' n√£o informado")

print("============================================")
print(f"üìÖ Data de refer√™ncia: {data_ref_carga}")
print(f"üì¶ Cat√°logo destino  : {catalog}.{schema}.{table}")
print(f"üìÇ Caminho RAW base  : {path_raw}")
print("============================================")

In [0]:
# ================================================================
# Monta nome e caminho do arquivo esperado (.nc)
# ================================================================
data_ref_fmt = data_ref_carga.replace("-", "")
file_name = f"INPE_FireRiskModel_2.2_FireRisk_{data_ref_fmt}.nc"
file_path = f"{path_raw}/{file_name}"

print(f"üîé Procurando arquivo: {file_path}")

In [0]:
# ================================================================
# Verifica se o arquivo existe no volume
# ================================================================
try:
    files = [f.path for f in dbutils.fs.ls(path_raw) if file_name in f.name]
except Exception as e:
    raise FileNotFoundError(f"‚ùå Erro ao acessar o volume RAW: {e}")

if not files:
    raise FileNotFoundError(f"‚ö†Ô∏è Nenhum arquivo NetCDF encontrado para {data_ref_carga}")

print(f"‚úÖ Arquivo encontrado: {files[0]}")

In [0]:
# ================================================================
# Leitura do arquivo NetCDF (copia para /tmp e abre com xarray)
# ================================================================
local_tmp = f"/tmp/{file_name}"

try:
    print(f"üìÇ Copiando para leitura local: {local_tmp}")
    dbutils.fs.cp(file_path, f"file:{local_tmp}")

    if not os.path.exists(local_tmp):
        raise FileNotFoundError(f"Arquivo n√£o copiado corretamente: {local_tmp}")

    print(f"üìñ Lendo arquivo NetCDF: {local_tmp}")
    ds = xr.open_dataset(local_tmp)
    df_pandas = ds.to_dataframe().reset_index()

    if df_pandas.empty:
        raise ValueError("‚ö†Ô∏è Arquivo lido, mas sem dados (DataFrame vazio).")

    # Converte para DataFrame Spark
    df = spark.createDataFrame(df_pandas)
    df = df.withColumn("data_ref_carga", lit(data_ref_carga))

    print(f"‚úÖ Linhas lidas: {df.count()}")

except Exception as e:
    raise RuntimeError(f"‚ùå Erro ao ler arquivo NetCDF: {e}")

In [0]:
# ================================================================
# Escrita na Tabela Bronze
# ================================================================
(
    df.write
    .format("delta")
    .mode("overwrite")
    .option("replaceWhere", f"data_ref_carga = '{data_ref_carga}'")
    .partitionBy("data_ref_carga")
    .saveAsTable(f"{catalog}.{schema}.{table}")
)

print(f"üíæ Dados gravados em: {catalog}.{schema}.{table}")
print("üöÄ Job Bronze finalizado com sucesso!")