In [0]:
# =============================================================
# JOB 2 — INPE BRONZE STREAM
# =============================================================
from pyspark.sql import SparkSession
from pyspark.sql.functions import input_file_name, current_timestamp
from datetime import datetime

try:
    dbutils.widgets.text("DATA_ALVO", datetime.now().strftime("%Y%m%d"))
    DATA_ALVO = dbutils.widgets.get("DATA_ALVO")
except Exception:
    DATA_ALVO = datetime.now().strftime("%Y%m%d")

spark = SparkSession.builder.appName(f"INPE_Bronze_Stream_{DATA_ALVO}").getOrCreate()

LANDING_PATH = f"/Volumes/datamasters/raw/raw_inpe/stream/inpe_in/{DATA_ALVO}"
CHECKPOINT_PATH = f"/Volumes/datamasters/raw/raw_inpe/stream/_checkpoints/{DATA_ALVO}"
SCHEMA_PATH = f"/Volumes/datamasters/raw/raw_inpe/stream/_schemas/{DATA_ALVO}"
TABELA_BRONZE = "datamasters.b_inep.focos_queimadas_stream"

print(f"⚡ Iniciando streaming de {LANDING_PATH} → {TABELA_BRONZE}")

df_stream = (
    spark.readStream
    .format("cloudFiles")
    .option("cloudFiles.format", "csv")
    .option("cloudFiles.schemaLocation", SCHEMA_PATH)
    .option("header", True)
    .load(LANDING_PATH)
    .withColumn("arquivo_origem", input_file_name())
    .withColumn("data_processamento", current_timestamp())
)

(
    df_stream.writeStream
    .format("delta")
    .option("checkpointLocation", CHECKPOINT_PATH)
    .option("mergeSchema", "true")
    .outputMode("append")
    .toTable(TABELA_BRONZE)
)