In [0]:
import pyspark.sql.functions as F
from pyspark.sql import SparkSession
 
spark = SparkSession.builder \
    .appName("IngestaoFactBalance") \
    .getOrCreate()

In [0]:
%run ./intancia_containers

In [0]:
from pyspark.sql.functions import (
    current_date,
    current_timestamp,
    lit,
    input_file_name
)

LANDING_PATH = containers_config["BALANCE"]
BRONZE_PATH = f"{BRONZE_BASE_PATH}/balancacomercial"

In [0]:
(
    spark.readStream
        .format("cloudFiles")
        .option("cloudFiles.format", "csv")
        .option(
            "cloudFiles.schemaLocation",
            f"{BRONZE_PATH}/_schemas/exp"
        )
        .option("header", "true")
        .option("sep", ";")
        .option("encoding", "ISO-8859-1")
        .option("pathGlobFilter", "EXP_[0-9][0-9][0-9][0-9].csv")
        .option("cloudFiles.includeExistingFiles", "true")
        .load(LANDING_PATH)
        .withColumn("_ingestion_date", current_date())
        .withColumn("_ingestion_timestamp", current_timestamp())
        .withColumn("_source_file", input_file_name())
        .writeStream
        .format("delta")
        .outputMode("append")
        .option(
            "checkpointLocation",
            f"{BRONZE_PATH}/_checkpoint/exp"
        )
        .trigger(once=True)
        .start(f"{BRONZE_PATH}/exp")
)

In [0]:
(
    spark.readStream
        .format("cloudFiles")
        .option("cloudFiles.format", "csv")
        .option(
            "cloudFiles.schemaLocation",
            f"{BRONZE_PATH}/_schemas/exp_mun"
        )
        .option("header", "true")
        .option("sep", ";")
        .option("encoding", "ISO-8859-1")
        .option("pathGlobFilter", "EXP_[0-9][0-9][0-9][0-9]_MUN.csv")
        .option("cloudFiles.includeExistingFiles", "true")
        .load(LANDING_PATH)
        .withColumn("_ingestion_date", current_date())
        .withColumn("_ingestion_timestamp", current_timestamp())
        .withColumn("_source_file", input_file_name())
        .writeStream
        .format("delta")
        .outputMode("append")
        .option(
            "checkpointLocation",
            f"{BRONZE_PATH}/_checkpoint/exp_mun"
        )
        .trigger(once=True)
        .start(f"{BRONZE_PATH}/exp_mun")
)

In [0]:
(
    spark.readStream
        .format("cloudFiles")
        .option("cloudFiles.format", "csv")
        .option(
            "cloudFiles.schemaLocation",
            f"{BRONZE_PATH}/_schemas/imp"
        )
        .option("header", "true")
        .option("sep", ";")
        .option("encoding", "ISO-8859-1")
        .option("pathGlobFilter", "IMP_[0-9][0-9][0-9][0-9].csv")
        .option("cloudFiles.includeExistingFiles", "true")
        .load(LANDING_PATH)
        .withColumn("_ingestion_date", current_date())
        .withColumn("_ingestion_timestamp", current_timestamp())
        .withColumn("_source_file", input_file_name())
        .writeStream
        .format("delta")
        .outputMode("append")
        .option(
            "checkpointLocation",
            f"{BRONZE_PATH}/_checkpoint/imp"
        )
        .trigger(once=True)
        .start(f"{BRONZE_PATH}/imp")
)

In [0]:
(
    spark.readStream
        .format("cloudFiles")
        .option("cloudFiles.format", "csv")
        .option(
            "cloudFiles.schemaLocation",
            f"{BRONZE_PATH}/_schemas/imp_mun"
        )
        .option("header", "true")
        .option("sep", ";")
        .option("encoding", "ISO-8859-1")
        .option("pathGlobFilter", "EXP_[0-9][0-9][0-9][0-9]_MUN.csv")
        .option("cloudFiles.includeExistingFiles", "true")
        .load(LANDING_PATH)
        .withColumn("_ingestion_date", current_date())
        .withColumn("_ingestion_timestamp", current_timestamp())
        .withColumn("_source_file", input_file_name())
        .writeStream
        .format("delta")
        .outputMode("append")
        .option(
            "checkpointLocation",
            f"{BRONZE_PATH}/_checkpoint/imp_mun"
        )
        .trigger(once=True)
        .start(f"{BRONZE_PATH}/imp_mun")
)