In [0]:
%run ./Containers

In [0]:
# Defining Sources
SRC_STORAGE_ACCOUNT = dbutils.secrets.get(scope="acelera-grupo-5-kv", key="src-storage-account")
SRC_BALANCE_CONTAINER = dbutils.secrets.get(scope="acelera-grupo-5-kv", key="src-balance-container")
SRC_CNPJ_CONTAINER = dbutils.secrets.get(scope="acelera-grupo-5-kv", key="src-cnpj-container")

containers_config = {
    "BALANCE": f"wasbs://{SRC_BALANCE_CONTAINER}@{SRC_STORAGE_ACCOUNT}.blob.core.windows.net",
    "CNPJ": f"wasbs://{SRC_CNPJ_CONTAINER}@{SRC_STORAGE_ACCOUNT}.blob.core.windows.net"
}
# Dicionário vazio para guardar os objetos instanciados
containers_registry = {}

In [0]:
# Instanciando Objetos
containers_registry = {
    nome: AzureContainer(path)
    for nome, path in containers_config.items()
}

In [0]:
# Lista para armazenar dicionários com os metadados
lista_arquivos_processada = []

for nome, obj in containers_registry.items():
    # Obtém a lista de arquivos do container
    arquivos = obj.get_files_list()
    
    for arq in arquivos:
        lista_arquivos_processada.append({
            "path": arq.path,
            "table_name": arq.name,
            "size": arq.size,
            "modificationTime": arq.modificationTime,
            "container_name": nome
        })

    print(f"✅ Metadados coletados de: {nome.upper()}")

# Criando o DataFrame com os arquivos
df_inventario = spark.createDataFrame(lista_arquivos_processada)

# Convertendo modificationTime para um formato mais amigável
df_inventario = df_inventario.withColumn(
    "modificationTime", 
    (F.col("modificationTime") / 1000).cast("timestamp")
)

display(df_inventario)

In [0]:
# Recuperando Chaves de Acesso
TGT_STORAGE_ACCOUNT = dbutils.secrets.get(scope="acelera-grupo-5-kv", key="tgt-storage-account")
BASE_PATH = dbutils.secrets.get(scope="acelera-grupo-5-kv", key="tgt-bronze-container")

# Definindo Path base do LakeHouse
BRONZE_BASE_PATH = f"abfss://{BASE_PATH}@{TGT_STORAGE_ACCOUNT}.dfs.core.windows.net"

CONTROL_TABLE_PATH = f"{BRONZE_BASE_PATH}/metadata/control_table"

In [0]:
from pyspark.sql.functions import current_timestamp
from delta.tables import DeltaTable

def path_exists(path: str) -> bool:
    try:
        dbutils.fs.ls(path)
        return True
    except:
        return False


if not path_exists(CONTROL_TABLE_PATH):
    df_init = (
        spark.createDataFrame(
            [],
            "originator STRING, table_name STRING, input_file_name STRING"
        )
        .withColumn("last_ingestion_timestamp", current_timestamp())
    )

    df_init.write.format("delta").mode("overwrite").partitionBy("originator").save(CONTROL_TABLE_PATH)
    print("✅ Tabela de controle inicializada.")
else:
    print("⚠️ Tabela de controle já existe.")
