In [0]:
from pyspark.sql import functions as F
from pyspark.sql.types import StructType, StructField, StringType, DoubleType, IntegerType, DateType

# ============================================================
# CONFIGURACION
# ============================================================
storage_account = "adlsmardata1307"
path_ecommerce  = f"abfss://raw@{storage_account}.dfs.core.windows.net/Ecommerce_Sales_Prediction_Dataset.csv"
CATALOGO        = "santig_120781"
TABLA_DESTINO   = f"{CATALOGO}.bronze.ecommerce_sales"

# ============================================================
# ESQUEMA EXPLICITO (evita columnas basura)
# ============================================================
ecommerce_schema = StructType([
    StructField("Date",             StringType(),  True),
    StructField("Product_Category", StringType(),  True),
    StructField("Price",            DoubleType(),  True),
    StructField("Discount",         DoubleType(),  True),
    StructField("Customer_Segment", StringType(),  True),
    StructField("Marketing_Spend",  DoubleType(),  True),
    StructField("Units_Sold",       IntegerType(), True)
])

# ============================================================
# LECTURA DEL CSV
# ============================================================
try:
    df_ecommerce = spark.read \
        .option("header", "True") \
        .option("sep", ",") \
        .schema(ecommerce_schema) \
        .csv(path_ecommerce)

    # Columna de auditoria
    df_ecommerce = df_ecommerce.withColumn("_process_date", F.current_timestamp())

    # Verificar que no este vacio
    if df_ecommerce.limit(1).count() == 0:
        raise Exception("El archivo CSV esta vacio.")

    print(f"Registros leidos: {df_ecommerce.count()}")
    display(df_ecommerce.limit(5))

except Exception as e:
    raise Exception(f"Error en la lectura del CSV: {e}")

# ============================================================
# ESCRITURA EN BRONZE
# ============================================================
try:
    df_ecommerce.write.format("delta") \
        .mode("overwrite") \
        .option("overwriteSchema", "true") \
        .saveAsTable(TABLA_DESTINO)

    print(f"OK: Datos cargados exitosamente en {TABLA_DESTINO}")

except Exception as e:
    raise Exception(f"Error al escribir en Bronze: {e}")

Registros leidos: 1000


Date,Product_Category,Price,Discount,Customer_Segment,Marketing_Spend,Units_Sold,_process_date
01-01-2023,Sports,932.8,35.82,Occasional,6780.38,32,2026-02-21T04:14:08.024885Z
02-01-2023,Toys,569.48,3.6,Premium,6807.56,16,2026-02-21T04:14:08.024885Z
03-01-2023,Home Decor,699.68,3.56,Premium,3793.91,27,2026-02-21T04:14:08.024885Z
04-01-2023,Toys,923.27,0.61,Premium,9422.75,29,2026-02-21T04:14:08.024885Z
05-01-2023,Toys,710.17,47.83,Premium,1756.83,17,2026-02-21T04:14:08.024885Z


OK: Datos cargados exitosamente en santig_120781.bronze.ecommerce_sales
