In [0]:
import pyspark.sql.functions as F
from pyspark.sql import SparkSession
 
spark = SparkSession.builder \
    .appName("IngestaoCorrelacao") \
    .getOrCreate()

In [0]:
# Recuperando Chaves de Acesso
TGT_STORAGE_ACCOUNT = dbutils.secrets.get(scope="secrets-kv", key="tgt-storage-account")
TGT_CONTAINER = dbutils.secrets.get(scope="secrets-kv", key="tgt-container")
TGT_SAS_TOKEN = dbutils.secrets.get(scope="secrets-kv", key="tgt-sas-token")

# Configuring SAS for Lakehouse Container
spark.conf.set(
    f"fs.azure.sas.{TGT_CONTAINER}.{TGT_STORAGE_ACCOUNT}.blob.core.windows.net",
    TGT_SAS_TOKEN
)

# Definindo Path base do LakeHouse
LAKEHOUSE_BASE_PATH = f"wasbs://{TGT_CONTAINER}@{TGT_STORAGE_ACCOUNT}.blob.core.windows.net/bronze_1"

In [0]:
import os
from pyspark.sql.functions import current_timestamp

# Definindo o caminho de origem e destino
working_dir = os.getcwd()
source_path = f"file:{working_dir}/corr_ncm_cnae.csv"
target_path = f"{LAKEHOUSE_BASE_PATH}/corr_ncm_cnae"

# Lendo o CSV local
df_bronze = spark.read.format("csv") \
    .option("header", "true") \
    .option("inferSchema", "true") \
    .option("sep", ";") \
    .load(source_path)

# Escrevendo no Lakehouse em formato Delta
df_bronze.write.format("delta") \
    .mode("overwrite") \
    .save(target_path)

controle_df = spark.createDataFrame(
    [("corr_ncm_cnae", "corr_ncm_cnae", "corr_ncm_cnae.csv")],
    ["container_name", "table_name", "input_file_name"]
).withColumn(
    "last_ingestion_timestamp", current_timestamp()
)

controle_df.write \
    .format("delta") \
    .mode("append") \
    .saveAsTable("bronze_1.bronze_control_table")

print(f"Carga finalizada com sucesso em: {target_path}")