#Databricks notebook: Bronze - Risco de Fogo(Di√°rio)

In [0]:
# Databricks notebook: Bronze - INPE Fire Risk (Di√°rio)
# ================================================================
# ================================================================
# Imports
# ================================================================
from pyspark.sql import SparkSession
from pyspark.sql.functions import lit
from datetime import datetime
import rasterio
import numpy as np
import pandas as pd
import os

spark = SparkSession.builder.getOrCreate()

In [0]:
# ================================================================
# Par√¢metros
# ================================================================
dbutils.widgets.text("catalog", "")
dbutils.widgets.text("schema", "")
dbutils.widgets.text("table", "")
dbutils.widgets.text("path_raw", "")
dbutils.widgets.text("data_ref_carga", "")

catalog         = dbutils.widgets.get("catalog")
schema          = dbutils.widgets.get("schema")
table           = dbutils.widgets.get("table")
path_raw        = dbutils.widgets.get("path_raw")
data_ref_carga  = dbutils.widgets.get("data_ref_carga")

if not data_ref_carga:
    raise ValueError("‚ùå Par√¢metro 'data_ref_carga' n√£o informado")

print("============================================")
print(f"üìÖ Data refer√™ncia: {data_ref_carga}")
print(f"üì¶ Destino: {catalog}.{schema}.{table}")
print(f"üìÇ RAW: {path_raw}")
print("============================================")

In [0]:
# ================================================================
# Buscar arquivo mais recente ‚â§ data_ref_carga
# ================================================================
files = [f.name for f in dbutils.fs.ls(path_raw)]

files_nc = [
    f for f in files 
    if f.startswith("INPE_FireRiskModel_2.2_FireRisk_") and f.endswith(".nc")
]

def extract_date(file):
    return int(file.replace("INPE_FireRiskModel_2.2_FireRisk_", "").replace(".nc", ""))

target_date = int(data_ref_carga.replace("-", ""))

valid_files = [(f, extract_date(f)) for f in files_nc if extract_date(f) <= target_date]

if not valid_files:
    raise FileNotFoundError(f"‚ö† Nenhum arquivo ‚â§ {data_ref_carga}")

selected_file = sorted(valid_files, key=lambda x: x[1], reverse=True)[0][0]
file_path = f"{path_raw}/{selected_file}"

print(f"‚úÖ Selecionado: {selected_file}")

In [0]:
# ================================================================
# Copiar arquivo para disco local (/local_disk0)
# ================================================================
local_path = f"/local_disk0/{selected_file}"

print("üìÅ Copiando arquivo para disco local...")
dbutils.fs.cp(f"dbfs:{file_path}", f"file:{local_path}")

print("Arquivo existe local?", os.path.exists(local_path))
if not os.path.exists(local_path):
    raise RuntimeError("‚ùå Falha ao copiar arquivo para /local_disk0")

In [0]:
# ================================================================
# Leitura NetCDF via rasterio (APENAS via /local_disk0)
# ================================================================
print(f"üìñ Lendo arquivo via rasterio: {local_path}")

try:
    with rasterio.open(local_path) as src:
        arr = src.read(1)  # banda 1
        transform = src.transform

        rows, cols = np.indices(arr.shape)
        xs, ys = rasterio.transform.xy(transform, rows, cols)

        df_pandas = pd.DataFrame({
            "lat": np.array(ys).flatten(),
            "lon": np.array(xs).flatten(),
            "rf": arr.flatten()
        })

except Exception as e:
    raise RuntimeError(f"‚ùå Erro lendo NetCDF com rasterio: {e}")

# Remover NaN
df_pandas = df_pandas.replace({np.nan: None})

print(f"üìä Linhas carregadas: {len(df_pandas)}")

In [0]:
df = spark.createDataFrame(df_pandas)
df = df.withColumn("data_ref_carga", lit(data_ref_carga))

In [0]:
# ================================================================
# Escrita Bronze
# ================================================================
(
    df.write
    .format("delta")
    .mode("overwrite")
    .option("replaceWhere", f"data_ref_carga = '{data_ref_carga}'")
    .partitionBy("data_ref_carga")
    .saveAsTable(f"{catalog}.{schema}.{table}")
)

print("üöÄ Bronze finalizado com sucesso!")