In [0]:
from pyspark.sql.functions import lit


In [0]:
RAW_BASE    = "abfss://balancacomercial@landingbeca2026jan.dfs.core.windows.net/"
BRONZE_BASE = "abfss://bronze@storagedatanexus.dfs.core.windows.net/autoloader/landingbeca2026jan/balancacomercial/"

ENC         = "latin1"
SEP         = ";"
MULTILINE   = "true"
ESCAPE_CHAR = "\\"
MAX_FILES   = "1000"


In [0]:
arquivos = [f for f in dbutils.fs.ls(RAW_BASE) if not f.isDir()]

In [0]:
for arq in arquivos:

    fileName    = arq.name
    tableName   = fileName.replace(".csv", "")
    rawFilePath = RAW_BASE + fileName
    bronzePath  = BRONZE_BASE + tableName + "_delta"
    chkBase     = BRONZE_BASE + tableName + "_chk"

    dbutils.fs.mkdirs(BRONZE_BASE)
    dbutils.fs.mkdirs(chkBase)

    print(f"\n=== PROCESSANDO: {fileName} ===")
    print(f"RAW: {rawFilePath}")
    print(f"BRONZE (tabela): bronze_balancacomercial.{tableName}\n")

    reader = (
        spark.readStream
            .format("cloudFiles")
            .option("cloudFiles.format", "csv")
            .option("cloudFiles.schemaLocation", f"{chkBase}/_schema")
            .option("cloudFiles.inferColumnTypes", "true")
            .option("cloudFiles.includeExistingFiles", "true")
            .option("cloudFiles.maxFilesPerTrigger", MAX_FILES)
            .option("pathGlobFilter", fileName)
            .option("header", "true")
            .option("sep", SEP)
            .option("multiLine", MULTILINE)
            .option("escape", ESCAPE_CHAR)
            .option("encoding", ENC)
            .option("badRecordsPath", f"{chkBase}/_bad")
            .option("rescuedDataColumn", "_rescued")
            .load(RAW_BASE)
    )

    dfWithSource = reader.withColumn("SOURCE_FILE", lit(fileName))

    query = (
        dfWithSource.writeStream
            .format("delta")
            .option("checkpointLocation", f"{chkBase}/_checkpoint")
            .option("mergeSchema", "true")
            .outputMode("append")
            .trigger(availableNow=True)
            .toTable(f"bronze_balancacomercial.{tableName}")
    )

    query.awaitTermination()

    dfBronze = spark.table(f"bronze_balancacomercial.{tableName}")
    print(f"Linhas ingeridas para {tableName}: {dfBronze.count()}")

print("\n=== INGESTÃO RAW → BRONZE FINALIZADA ===")