In [0]:
# Configuración global
storage_account = "adlsmartdavid2202"
bronze_container = "bronze"
catalog = "smartdavid_catalog"
schema_bronze = "bronze"

In [0]:
files = dbutils.fs.ls(f"abfss://bronze@adlsmartdavid2202.dfs.core.windows.net/")
for f in files:
    print(f.name, "→", f.size, "bytes")

In [0]:
df_united = spark.read.csv(
    f"abfss://{bronze_container}@{storage_account}.dfs.core.windows.net/DATA_24122025.csv",
    header=True,
    inferSchema=False,
    sep=","
)
print(f"UNITED_DATA — Registros: {df_united.count()} | Columnas: {len(df_united.columns)}")
df_united.limit(3).display()

In [0]:
df_pagos_prico = spark.read.csv(
    f"abfss://{bronze_container}@{storage_account}.dfs.core.windows.net/pagos_prico2026.csv",
    header=True,
    inferSchema=False,
    sep=";"
)
print(f"PAGOS_PRICO — Registros: {df_pagos_prico.count()} | Columnas: {len(df_pagos_prico.columns)}")
df_pagos_prico.limit(3).display()

In [0]:
df_pagos_mepeco = spark.read.csv(
    f"abfss://{bronze_container}@{storage_account}.dfs.core.windows.net/pagos_mepeco2026.csv",
    header=True,
    inferSchema=False,
    sep=","
)
print(f"PAGOS_MEPECO — Registros: {df_pagos_mepeco.count()} | Columnas: {len(df_pagos_mepeco.columns)}")
df_pagos_mepeco.limit(3).display()

In [0]:
print("=== UNITED_DATA ===")
df_united.printSchema()

print("=== PAGOS_PRICO ===")
df_pagos_prico.printSchema()

print("=== PAGOS_MEPECO ===")
df_pagos_mepeco.printSchema()

In [0]:
# Cell 5.5 — Limpiar nombres de columnas (quitar espacios, puntos, caracteres especiales)
import re

def clean_column_names(df):
    new_columns = []
    for col in df.columns:
        # Reemplazar espacios, puntos y caracteres especiales por guion bajo
        new_col = re.sub(r'[\s\.\(\)\{\}\[\]/\\,;]', '_', col)
        # Quitar guiones bajos dobles o al final
        new_col = re.sub(r'_+', '_', new_col).strip('_')
        new_columns.append(new_col)
    return df.toDF(*new_columns)

# Aplicar limpieza a los 3 datasets
df_united      = clean_column_names(df_united)
df_pagos_prico = clean_column_names(df_pagos_prico)
df_pagos_mepeco = clean_column_names(df_pagos_mepeco)

# Verificar columnas limpias
print("=== UNITED_DATA columnas limpias ===")
print(df_united.columns)

print("\n=== PAGOS_PRICO columnas limpias ===")
print(df_pagos_prico.columns)

print("\n=== PAGOS_MEPECO columnas limpias ===")
print(df_pagos_mepeco.columns)

In [0]:
# Guardar DATA
df_united.write \
    .format("delta") \
    .mode("overwrite") \
    .option("overwriteSchema", "true") \
    .saveAsTable(f"{catalog}.{schema_bronze}.unit_data")

# Guardar PAGOS_PRICO
df_pagos_prico.write \
    .format("delta") \
    .mode("overwrite") \
    .option("overwriteSchema", "true") \
    .saveAsTable(f"{catalog}.{schema_bronze}.pagos_prico")

# Guardar PAGOS_MEPECO
df_pagos_mepeco.write \
    .format("delta") \
    .mode("overwrite") \
    .option("overwriteSchema", "true") \
    .saveAsTable(f"{catalog}.{schema_bronze}.pagos_mepeco")

print("Las 3 tablas Delta guardadas en Bronze exitosamente")

In [0]:
%sql
SHOW TABLES IN smartdavid_catalog.bronze;

In [0]:
%sql
SELECT 'united_data'  AS tabla, COUNT(*) AS registros FROM smartdavid_catalog.bronze.unit_data
UNION ALL
SELECT 'pagos_prico'  AS tabla, COUNT(*) AS registros FROM smartdavid_catalog.bronze.pagos_prico
UNION ALL
SELECT 'pagos_mepeco' AS tabla, COUNT(*) AS registros FROM smartdavid_catalog.bronze.pagos_mepeco;