In [0]:
from datetime import datetime
import requests
import os
from pyspark.sql import SparkSession

In [0]:
# =====================================
# INICIALIZA√á√ÉO SPARK
# =====================================
spark = SparkSession.builder.getOrCreate()

In [0]:
# =====================================
# PAR√ÇMETROS DO JOB (Widgets)
# =====================================
dbutils.widgets.text("data_ref_carga", "", "Data de Carga (AAAA-MM-DD)")
dbutils.widgets.text("destino", "", "Destino no Volume (ex: /Volumes/datamasters/raw/raw_inpe)")
dbutils.widgets.text("url_csv", "", "URL base do CSV (opcional)")

In [0]:
# Lendo par√¢metros
data_ref_carga = dbutils.widgets.get("data_ref_carga") or ""
destino_base = dbutils.widgets.get("destino") or ""
url_csv = dbutils.widgets.get("url_csv") or ""

In [0]:
# =====================================
# VALIDA√á√ÉO E FORMATOS
# =====================================
# Se n√£o for passada, usa a data do dia
if not data_ref_carga:
    data_ref_carga = datetime.now().strftime("%Y-%m-%d")

# Converte para AAAAMMDD
data_ref_fmt = data_ref_carga.replace("-", "")

In [0]:
# =====================================
# MONTA URL E DESTINO
# =====================================
if not url_csv:
    url_csv_final = (
        f"https://dataserver-coids.inpe.br/queimadas/queimadas/focos/csv/diario/Brasil/"
        f"focos_diario_br_{data_ref_fmt}.csv"
    )
else:
    url_csv = url_csv.rstrip('/').removesuffix('.csv')
    url_csv_final = f"{url_csv}{data_ref_fmt}.csv"

if not destino_base:
    destino_base = "/Volumes/datamasters/raw/raw_inpe"

destino_final = f"{destino_base}/focos_diario_br_{data_ref_fmt}.csv"

In [0]:
# =====================================
# LOGS DE CONTROLE
# =====================================
print("=====================================")
print(f"Data de carga        : {data_ref_carga}")
print(f"Data formatada (yyyymmdd): {data_ref_fmt}")
print(f"URL de origem        : {url_csv_final}")
print(f"Destino final        : {destino_final}")
print("=====================================")

In [0]:
# =====================================
# DOWNLOAD DO ARQUIVO CSV
# =====================================
try:
    response = requests.get(url_csv_final, timeout=60)
    if response.status_code != 200:
        raise Exception(f"Falha ao baixar arquivo ({response.status_code}): {url_csv_final}")

    os.makedirs(destino_base, exist_ok=True)
    with open(destino_final, "wb") as f:
        f.write(response.content)
    print(f"‚úÖ Arquivo gravado com sucesso: {destino_final}")

except Exception as e:
    raise RuntimeError(f"‚ùå Erro no download do arquivo INPE: {e}")

In [0]:
# =====================================
# LEITURA PARA VERIFICA√á√ÉO
# =====================================
try:
    df = spark.read.option("header", True).option("delimiter", ",").csv(destino_final)
    total = df.count()
    print(f"üìä Arquivo cont√©m {total} registros.")
except Exception as e:
    raise RuntimeError(f"‚ùå Erro ao ler o arquivo CSV: {e}")