In [0]:
# Read raw ingestion tables (already populated via data ingestion)
raw_sales_df = spark.read.table("workspace.default.sales_transactions")
raw_product_df = spark.read.table("workspace.default.product_master")
raw_store_df = spark.read.table("workspace.default.store_region")


In [0]:
from pyspark.sql.functions import current_timestamp, lit, to_date


In [0]:
bronze_sales_df = (
    raw_sales_df
    # Add ingestion metadata for audit and lineage
    .withColumn("ingestion_timestamp", current_timestamp())
    .withColumn("source_system", lit("raw_ingestion"))
    .withColumn("ingestion_date", to_date(current_timestamp()))
)


In [0]:
bronze_product_df = (
    raw_product_df
    .withColumn("ingestion_timestamp", current_timestamp())
    .withColumn("source_system", lit("raw_ingestion"))
    .withColumn("ingestion_date", to_date(current_timestamp()))
)


In [0]:
bronze_store_df = (
    raw_store_df
    .withColumn("ingestion_timestamp", current_timestamp())
    .withColumn("source_system", lit("raw_ingestion"))
    .withColumn("ingestion_date", to_date(current_timestamp()))
)


In [0]:
bronze_sales_df.write.format("delta") \
    .mode("append") \
    .partitionBy("ingestion_date") \
    .saveAsTable("bronze_sales_transactions")


In [0]:
bronze_product_df.write.format("delta") \
    .mode("append") \
    .partitionBy("ingestion_date") \
    .saveAsTable("bronze_product_master")


In [0]:
bronze_store_df.write.format("delta") \
    .mode("append") \
    .partitionBy("ingestion_date") \
    .saveAsTable("bronze_store_region")


In [0]:
%sql
-- Validation
SELECT COUNT(*) FROM workspace.default.sales_transactions;
SELECT COUNT(*) FROM bronze_sales_transactions;


COUNT(*)
1000


In [0]:
%sql
DESCRIBE DETAIL bronze_sales_transactions;


format,id,name,description,location,createdAt,lastModified,partitionColumns,clusteringColumns,numFiles,sizeInBytes,properties,minReaderVersion,minWriterVersion,tableFeatures,statistics,clusterByAuto
delta,b3e9de17-5619-4919-a2cc-f8f64e1b26bd,workspace.default.bronze_sales_transactions,,,2025-12-23T00:07:41.361Z,2025-12-23T00:07:50.000Z,List(ingestion_date),List(),1,15078,"Map(delta.parquet.compression.codec -> zstd, delta.enableDeletionVectors -> true)",3,7,"List(appendOnly, deletionVectors, invariants)","Map(numRowsDeletedByDeletionVectors -> 0, numDeletionVectors -> 0)",False
