In [0]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql import types as T
from pyspark.sql.window import Window
from functools import reduce
import re

In [0]:
caminho_volume = "/Volumes/catalogo_energia/bronze/dados_brutos_csv/"

In [0]:
# Carregamento de CSV
def load_csv(path, sep=",", header=True, inferSchema=True):
    return (
        spark.read
        .option("header", header)
        .option("sep", sep)
        .option("inferSchema", inferSchema)
        .csv(path)
    )

In [0]:
def normalize_columns(df):
    mapping = {
        r"[áàãâä]": "a",
        r"[éèêë]": "e",
        r"[íìîï]": "i",
        r"[óòõôö]": "o",
        r"[úùûü]": "u",
        r"[ç]": "c"
    }

    new_df = df
    for col in df.columns:
        clean = str(col)  # << FORÇA ser string
        clean = clean.lower()

        # remover acentos
        for pattern, repl in mapping.items():
            clean = re.sub(pattern, repl, clean)

        # tudo que NÃO é letra ou número vira "_"
        clean = re.sub(r"[^a-z0-9]+", "_", clean)

        # remover múltiplos "_"
        clean = re.sub(r"_+", "_", clean)

        # remover "_" no início/fim
        clean = clean.strip("_")

        new_df = new_df.withColumnRenamed(col, clean)

    return new_df

In [0]:
# Carregamento dos DataFrames de Consumo
df_geral = load_csv(f"{caminho_volume}consumo_energia_20*.csv", header=True, inferSchema=True)
df_limpa = load_csv(f"{caminho_volume}consumo_energia_limpa*.csv", header=True, inferSchema=True)



In [0]:
# Higienização técnica
df_geral = normalize_columns(df_geral)
df_limpa = normalize_columns(df_limpa)

#Adicionando Rastreabilidade (Metadata do Unity Catalog)
df_geral = df_geral.withColumn("nome_arquivo_origem", F.col("_metadata.file_path")) \
                   .withColumn("data_carga", F.current_timestamp())

df_limpa = df_limpa.withColumn("nome_arquivo_origem", F.col("_metadata.file_path")) \
                   .withColumn("data_carga", F.current_timestamp())

In [0]:
#Produzindo as saídas para ser consumida na Prata:
df_geral.write.format("delta").mode("overwrite").option("overwriteSchema", "true").saveAsTable("catalogo_energia.bronze.consumo_geral")
df_limpa.write.format("delta").mode("overwrite").option("overwriteSchema", "true").saveAsTable("catalogo_energia.bronze.energia_limpa")