In [1]:
from pyspark.sql import SparkSession
from delta import configure_spark_with_delta_pip

builder = SparkSession.builder \
    .appName("Delta Spark 4.0.0") \
    .master("local[*]") \
    .enableHiveSupport() \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")

spark = configure_spark_with_delta_pip(builder).getOrCreate()

In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import current_timestamp, input_file_name

df_customer = spark.read.csv("../data/Customer.csv", header = True, inferSchema=True)\
    .withColumn("fecha_carga", current_timestamp())\
    .withColumn("archivo_origen", input_file_name())
df_product = spark.read.csv("../data/Product.csv", header = True, inferSchema=True)\
    .withColumn("fecha_carga", current_timestamp())\
    .withColumn("archivo_origen", input_file_name())
df_so_detail = spark.read.csv("../data/SalesOrderDetail.csv", header = True, inferSchema=True)\
    .withColumn("fecha_carga", current_timestamp())\
    .withColumn("archivo_origen", input_file_name())
df_so_header = spark.read.csv("../data/SalesOrderHeader.csv", header = True, inferSchema=True)\
    .withColumn("fecha_carga", current_timestamp())\
    .withColumn("archivo_origen", input_file_name())
df_s_territory = spark.read.csv("../data/SalesTerritory.csv", header = True, inferSchema=True)\
    .withColumn("fecha_carga", current_timestamp())\
    .withColumn("archivo_origen", input_file_name())

In [3]:
df_customer.show()
df_product.show()
df_so_detail.show()
df_so_header.show()
df_s_territory.show()

+----------+--------+-------+-----------+-------------+--------------------+--------------------+--------------------+--------------------+
|CustomerID|PersonID|StoreID|TerritoryID|AccountNumber|             rowguid|        ModifiedDate|         fecha_carga|      archivo_origen|
+----------+--------+-------+-----------+-------------+--------------------+--------------------+--------------------+--------------------+
|         1|    NULL|    934|          1|   AW00000001|{3F5AE95E-B87D-4A...|2014-09-12 11:15:...|2025-07-30 22:13:...|file:///d:/Curso%...|
|         2|    NULL|   1028|          1|   AW00000002|{E552F657-A9AF-4A...|2014-09-12 11:15:...|2025-07-30 22:13:...|file:///d:/Curso%...|
|         3|    NULL|    642|          4|   AW00000003|{130774B1-DB21-4E...|2014-09-12 11:15:...|2025-07-30 22:13:...|file:///d:/Curso%...|
|         4|    NULL|    932|          4|   AW00000004|{FF862851-1DAA-40...|2014-09-12 11:15:...|2025-07-30 22:13:...|file:///d:/Curso%...|
|         5|    NULL

In [4]:
df_customer.write.format("delta").mode("overwrite").save("../sql/dw_bronze/customer")
df_product.write.format("delta").mode("overwrite").save("../sql/dw_bronze/product")
df_so_detail.write.format("delta").mode("overwrite").save("../sql/dw_bronze/so_detail")
df_so_header.write.format("delta").mode("overwrite").save("../sql/dw_bronze/so_header.write")
df_s_territory.write.format("delta").mode("overwrite").save("../sql/dw_bronze/s_terrotory.write")