In [0]:
# Bronze ingestion config
SOURCE_PATH = "dbfs:/databricks-datasets/retail-org/sales_orders/"
TARGET_TABLE = "retail_project.bronze.sales_orders"

In [0]:
# Imports
from pyspark.sql import functions as F

In [0]:
# Detect file format (standardized, Spark-safe)

files = dbutils.fs.ls(SOURCE_PATH)

# Ignore Spark metadata files and directories
data_files = [
    f.name.lower()
    for f in files
    if not f.name.startswith("_") and "." in f.name
]

if not data_files:
    raise ValueError(f"No data files found under {SOURCE_PATH}")

# Collect unique file extensions
extensions = {name.split(".")[-1] for name in data_files}

# Enforce single-format sources
if len(extensions) != 1:
    raise ValueError(
        f"Mixed or unsupported file types under {SOURCE_PATH}: {extensions}"
    )

FILE_FORMAT = extensions.pop()

# Allow only known formats
if FILE_FORMAT not in {"parquet", "csv", "json", "xml"}:
    raise ValueError(
        f"Unsupported file format '{FILE_FORMAT}' under {SOURCE_PATH}"
    )

print("Detected format:", FILE_FORMAT)

Detected format: json


In [0]:
# Read raw JSON data
df_raw = spark.read.format("json").load(SOURCE_PATH)

# Bronze enrichment (standard)
df_bronze = (
    df_raw
    .withColumn("_read_timestamp", F.current_timestamp())
    .withColumn("_source_path", F.col("_metadata.file_path"))
    .withColumn("_file_size", F.col("_metadata.file_size"))
)

display(df_bronze.limit(100))
df_bronze.printSchema()

clicked_items,customer_id,customer_name,number_of_line_items,order_datetime,order_number,ordered_products,promo_info,_read_timestamp,_source_path,_file_size
"List(List(AVpfPEx61cnluZ0-gyT9, 34), List(AVpfuJ4pilAPnD_xhDyM, 98), List(AVpe6jFBilAPnD_xQxO2, 60), List(AVpfIODe1cnluZ0-eg35, 49))",19476252,"otbda , outside the box digital agency ,",3,1564627663.0,317568014,"List(List(USD, AVpfuJ4pilAPnD_xhDyM, Rony LBT-GPX555 Mini-System with Bluetooth and NFC, 993, null, 3, pcs), List(USD, AVpe6jFBilAPnD_xQxO2, Aeon 71.5 x 130.9 16:9 Fixed Frame Projection Screen with CineWhite Projection Surface, 218, null, 3, pcs), List(USD, AVpfIODe1cnluZ0-eg35, Cyber-shot DSC-WX220 Digital Camera (Black), 448, null, 2, pcs))",List(),2025-12-14T21:18:15.217Z,dbfs:/databricks-datasets/retail-org/sales_orders/part-00000-tid-1771549084454148016-e2275afd-a5bb-40ed-b044-1774c0fdab2b-105592-1-c000.json,2558986
"List(List(AVpfdBS41cnluZ0-lBIj, 88))",4401099,denevi digital imaging,1,1564630035.0,317568015,"List(List(USD, AVpfdBS41cnluZ0-lBIj, Details About Mogitech G920 Xbox Driving Force Racing Wheel For Xbox One And Pc (941000121), 293, null, 4, pcs))",List(),2025-12-14T21:18:15.217Z,dbfs:/databricks-datasets/retail-org/sales_orders/part-00000-tid-1771549084454148016-e2275afd-a5bb-40ed-b044-1774c0fdab2b-105592-1-c000.json,2558986
"List(List(AVpgIu4Q1cnluZ0-xBK-, 13), List(AVpfeG5oilAPnD_xcTsG, 27), List(AVqVGaEBv8e3D1O-ldFu, 64), List(AVpg-Wj61cnluZ0-8sZe, 87), List(AVphTO5W1cnluZ0-Aygg, 52), List(AVpfMVD-ilAPnD_xW6bu, 49))",14939501,rpm optoelectronics,2,1564632430.0,317568016,"List(List(USD, AVphTO5W1cnluZ0-Aygg, Adventura SH 140 II Shoulder Bag (Black), 27, null, 1, pcs), List(USD, AVpfMVD-ilAPnD_xW6bu, Rony - BC-TRX Battery Charger - Black, 31, List(0.03, 0, AVpfMVD-ilAPnD_xW6bu, 2), 2, pcs))","List(List(0.03, 0, AVpfMVD-ilAPnD_xW6bu, 2))",2025-12-14T21:18:15.217Z,dbfs:/databricks-datasets/retail-org/sales_orders/part-00000-tid-1771549084454148016-e2275afd-a5bb-40ed-b044-1774c0fdab2b-105592-1-c000.json,2558986
"List(List(AVpfWxx7LJeJML437u-H, 52), List(AVpfvr461cnluZ0-qgHR, 54), List(AVpiMIyE1cnluZ0-K0TA, 14), List(AVz5wc0H-jtxr-f30F6_, 84), List(AVpe7vER1cnluZ0-aJu7, 65))",3072597,"non typical, inc.",3,1564635708.0,317568017,"List(List(USD, AVpiMIyE1cnluZ0-K0TA, Elpine - Rear View Camera - Black, 60, null, 1, pcs), List(USD, AVz5wc0H-jtxr-f30F6_, Zamaha - AVENTAGE 7.2-Ch. 4K Ultra HD A/V Home Theater Receiver - Black, 149, null, 9, pcs), List(USD, AVpe7vER1cnluZ0-aJu7, Mogitech Keys-To-Go Ultra-Portable Bluetooth Keyboard for Android and Windows, 618, null, 3, pcs))",List(),2025-12-14T21:18:15.217Z,dbfs:/databricks-datasets/retail-org/sales_orders/part-00000-tid-1771549084454148016-e2275afd-a5bb-40ed-b044-1774c0fdab2b-105592-1-c000.json,2558986
"List(List(AVpfCQslilAPnD_xThwe, 80), List(AVpiMIyE1cnluZ0-K0TA, 50))",2732808,als deli,2,1564635797.0,317568018,"List(List(USD, AVpfCQslilAPnD_xThwe, M80UWH Manual Series Projection Screen (39.6 x 69.6), 84, null, 1, pcs), List(USD, AVpiMIyE1cnluZ0-K0TA, Elpine - Rear View Camera - Black, 149, null, 2, pcs))",List(),2025-12-14T21:18:15.217Z,dbfs:/databricks-datasets/retail-org/sales_orders/part-00000-tid-1771549084454148016-e2275afd-a5bb-40ed-b044-1774c0fdab2b-105592-1-c000.json,2558986
"List(List(AVpfDLA0ilAPnD_xT0sq, 21), List(AVwjdOdov8e3D1O-nnK9, 85), List(AVpfMVD-ilAPnD_xW6bu, 75))",3838589,"MORENO, SALVADOR C",1,1564637502.0,317568019,"List(List(USD, AVpfMVD-ilAPnD_xW6bu, Rony - BC-TRX Battery Charger - Black, 27, List(0.03, 0, AVpfMVD-ilAPnD_xW6bu, 3), 3, pcs))","List(List(0.03, 0, AVpfMVD-ilAPnD_xW6bu, 3))",2025-12-14T21:18:15.217Z,dbfs:/databricks-datasets/retail-org/sales_orders/part-00000-tid-1771549084454148016-e2275afd-a5bb-40ed-b044-1774c0fdab2b-105592-1-c000.json,2558986
"List(List(AVpfDLA0ilAPnD_xT0sq, 21), List(AVwjdOdov8e3D1O-nnK9, 85), List(AVpfMVD-ilAPnD_xW6bu, 75))",3838589,"MORENO, SALVADOR C",2,1564637509.0,317568019,"List(List(USD, AVpfMVD-ilAPnD_xW6bu, Rony - BC-TRX Battery Charger - Black, 27, List(0.03, 0, AVpfMVD-ilAPnD_xW6bu, 3), 3, pcs), List(USD, AVpiJoY0LJeJML43iz5-, SRS-XB2 Portable Bluetooth Wireless Speaker (Red), 63, null, 1, pcs))","List(List(0.03, 0, AVpfMVD-ilAPnD_xW6bu, 3))",2025-12-14T21:18:15.217Z,dbfs:/databricks-datasets/retail-org/sales_orders/part-00000-tid-1771549084454148016-e2275afd-a5bb-40ed-b044-1774c0fdab2b-105592-1-c000.json,2558986
"List(List(AVpe9oDr1cnluZ0-a1wt, 34), List(AVpe7vER1cnluZ0-aJu7, 74), List(AVpfMVD-ilAPnD_xW6bu, 48))",7159905,"TURNER ALSTON, DENISE",2,1564637774.0,317568020,"List(List(USD, AVpe7vER1cnluZ0-aJu7, Mogitech Keys-To-Go Ultra-Portable Bluetooth Keyboard for Android and Windows, 60, null, 1, pcs), List(USD, AVpfMVD-ilAPnD_xW6bu, Rony - BC-TRX Battery Charger - Black, 27, List(0.03, 0, AVpfMVD-ilAPnD_xW6bu, 2), 2, pcs))","List(List(0.03, 0, AVpfMVD-ilAPnD_xW6bu, 2))",2025-12-14T21:18:15.217Z,dbfs:/databricks-datasets/retail-org/sales_orders/part-00000-tid-1771549084454148016-e2275afd-a5bb-40ed-b044-1774c0fdab2b-105592-1-c000.json,2558986
"List(List(AVphzZkJ1cnluZ0-Gkcq, 28), List(AVpe48Es1cnluZ0-ZHZU, 46), List(AVpge6k2LJeJML43OhAl, 16), List(AVpfCW42ilAPnD_xTj0y, 70))",13728809,"BOYLAN, HENRI M",2,1564644018.0,317568021,"List(List(USD, AVpge6k2LJeJML43OhAl, SENNHEISER HD 558 Over Ear Headphones HD558 - AUTHORIZED DEALER, 287, null, 1, pcs), List(USD, AVpfCW42ilAPnD_xTj0y, CD-C600 5-Disc CD Changer, 102, null, 3, pcs))",List(),2025-12-14T21:18:15.217Z,dbfs:/databricks-datasets/retail-org/sales_orders/part-00000-tid-1771549084454148016-e2275afd-a5bb-40ed-b044-1774c0fdab2b-105592-1-c000.json,2558986
"List(List(AVpfMVD-ilAPnD_xW6bu, 36), List(AVpjedgc1cnluZ0-W4NI, 64))",8513182,guardian sprinklers inc,2,1564644839.0,317568022,"List(List(USD, AVpfMVD-ilAPnD_xW6bu, Rony - BC-TRX Battery Charger - Black, 27, List(0.03, 0, AVpfMVD-ilAPnD_xW6bu, 7), 7, pcs), List(USD, AVpjedgc1cnluZ0-W4NI, Rony MEXM100BT 160W RMS Marine CD Receiver with Bluetooth (Black) and SiriusXM Ready, 239, null, 3, pcs))","List(List(0.03, 0, AVpfMVD-ilAPnD_xW6bu, 7))",2025-12-14T21:18:15.217Z,dbfs:/databricks-datasets/retail-org/sales_orders/part-00000-tid-1771549084454148016-e2275afd-a5bb-40ed-b044-1774c0fdab2b-105592-1-c000.json,2558986


root
 |-- clicked_items: array (nullable = true)
 |    |-- element: array (containsNull = true)
 |    |    |-- element: string (containsNull = true)
 |-- customer_id: string (nullable = true)
 |-- customer_name: string (nullable = true)
 |-- number_of_line_items: string (nullable = true)
 |-- order_datetime: string (nullable = true)
 |-- order_number: long (nullable = true)
 |-- ordered_products: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- curr: string (nullable = true)
 |    |    |-- id: string (nullable = true)
 |    |    |-- name: string (nullable = true)
 |    |    |-- price: long (nullable = true)
 |    |    |-- promotion_info: struct (nullable = true)
 |    |    |    |-- promo_disc: double (nullable = true)
 |    |    |    |-- promo_id: long (nullable = true)
 |    |    |    |-- promo_item: string (nullable = true)
 |    |    |    |-- promo_qty: long (nullable = true)
 |    |    |-- qty: long (nullable = true)
 |    |    |-- unit: string

In [0]:
# Write to Delta Bronze table
(
    df_bronze.write
        .format("delta")
        .option("overwriteSchema", "true")  # Bronze schema is authoritative
        .mode("overwrite")                  # Full refresh
        .saveAsTable(TARGET_TABLE)
)

print(f"Wrote Bronze table: {TARGET_TABLE}")

Wrote Bronze table: retail_project.bronze.sales_orders


In [0]:
# Quick validation
spark.sql(f"SELECT COUNT(*) AS row_count FROM {TARGET_TABLE}").show()

+---------+
|row_count|
+---------+
|     4074|
+---------+

