In [0]:
# Bronze ingestion config
SOURCE_PATH = "dbfs:/databricks-datasets/retail-org/suppliers/"
TARGET_TABLE = "retail_project.bronze.suppliers"

In [0]:
# Imports
from pyspark.sql import functions as F

In [0]:
# Detect file format (standardized, Spark-safe)

files = dbutils.fs.ls(SOURCE_PATH)

# Ignore Spark metadata files and directories
data_files = [
    f.name.lower()
    for f in files
    if not f.name.startswith("_") and "." in f.name
]

if not data_files:
    raise ValueError(f"No data files found under {SOURCE_PATH}")

# Collect unique file extensions
extensions = {name.split(".")[-1] for name in data_files}

# Enforce single-format sources
if len(extensions) != 1:
    raise ValueError(
        f"Mixed or unsupported file types under {SOURCE_PATH}: {extensions}"
    )

FILE_FORMAT = extensions.pop()

# Allow only known formats
if FILE_FORMAT not in {"parquet", "csv", "json", "xml"}:
    raise ValueError(
        f"Unsupported file format '{FILE_FORMAT}' under {SOURCE_PATH}"
    )

print("Detected format:", FILE_FORMAT)

Detected format: csv


In [0]:
# Read raw CSV data
reader = (
    spark.read
         .format("csv")
         .option("header", "true")
         .option("inferSchema", "true")
         .option("mode", "PERMISSIVE")
)

df_raw = reader.load(SOURCE_PATH)

# Bronze enrichment (standard)
df_bronze = (
    df_raw
    .withColumn("_read_timestamp", F.current_timestamp())
    .withColumn("_source_path", F.col("_metadata.file_path"))
    .withColumn("_file_size", F.col("_metadata.file_size"))
)

display(df_bronze.limit(10))
df_bronze.printSchema()

SUPPLIER_ID,TAX_ID,supplier_name,state,city,postcode,street,number,unit,region,district,lon,lat,items_provided,_read_timestamp,_source_path,_file_size
4186221,624914924,Ankyo,MA,IPSWICH,1938.0,HAYWARD STREET,21.0,13.0,MA,ESSEX,-70.84378829999999,42.6721925,"Ankyo - TX 7.2-Ch. Network-Ready A/V Home Theater Receiver - Black,Ankyo TXNR757 7.2 Channel Wireless A/V Receiver w/ HDCP2.2/HDR DTS _ Bluetooth,SKW-204 10 230W Powered Subwoofer,Ankyo TX-NR555 7.2-Channel Network A/V Receiver",2025-12-13T15:41:40.230Z,dbfs:/databricks-datasets/retail-org/suppliers/suppliers.csv,7555
14826641,192206348,Apson,CA,Huntington Park,90255.0,Middleton Street,6124.0,,,,-118.2286537,33.985047200000004,"Apson EX5250 Pro Wireless Business Projector,Apson PowerLite 740HD LCD Projector - 720p - HDTV - 16:10 V11H764020,Apson - Home Cinema 2045 LCD Projector - White",2025-12-13T15:41:40.230Z,dbfs:/databricks-datasets/retail-org/suppliers/suppliers.csv,7555
23365430,516306860,Conan,IL,WOODRIDGE,60517.0,WOODWARD AVE,7776.0,,IL,,-88.03135959999999,41.7452905,PowerShot G5 X Digital Camera Free Accessory Kit,2025-12-13T15:41:40.230Z,dbfs:/databricks-datasets/retail-org/suppliers/suppliers.csv,7555
2341585,421083909,Elpine,MN,Aitkin,56431.0,Stark Lake Rd,27279.0,,MN,,-93.8265176,46.6441497,"Elpine PDXM12 1200W Mono RMS Digital Amplifier,Elpine - Rear View Camera - Black,Elpine - 6-1/2 2-Way Component Car Speakers with Poly-Mica Cones (Pair) - Black""",2025-12-13T15:41:40.230Z,dbfs:/databricks-datasets/retail-org/suppliers/suppliers.csv,7555
7410873,285503154,Karsair,CA,King City,93930.0,HAVEN DRIVE,415.0,,,,-121.1212356,36.2108813,"Karsair CMY32GX3M4A1600C9 Vengeance Pro 32GB (4x8GB) DDR3 1600 MHz (PC3 12800) Desktop 1.5V,Karsair - VENGEANCE LED Series 16GB (2PK 8GB) 3.0GHz DDR4 Desktop Memory with LED Lighting - Black,Karsair - VENGEANCE Series 16GB (2PK 8GB) 2.4GHz DDR4 Laptop Memory - Black,Karsair - AX760 760-Watt ATX Power Supply - Black,CORSAIR HYDRO SERIES H100i v2 AIO Liquid CPU Cooler",2025-12-13T15:41:40.230Z,dbfs:/databricks-datasets/retail-org/suppliers/suppliers.csv,7555
57100396,691389968,Mannheiser,NC,CHARLOTTE,28205.0,BYRNES ST,3520.0,,,MECKLENBURG,-80.79093809999999,35.242787799999995,"Mannheiser - Digital Headphone Amplifier - Silver,Mannheiser - RS 175 Over-the-Ear Wireless Headphone System - Black,Mannheiser - Earbud Headphones - Black,SENNHEISER HD 558 Over Ear Headphones HD558 - AUTHORIZED DEALER",2025-12-13T15:41:40.230Z,dbfs:/databricks-datasets/retail-org/suppliers/suppliers.csv,7555
50683866,395142800,Mogitech,TX,DENTON,76201.0,STELLA ST,,,TX,,-97.1559366,33.213522,"Details About Mogitech G920 Xbox Driving Force Racing Wheel For Xbox One And Pc (941000121),Mogitech - Harmony 950 Universal Remote - Black,Mogitech Keys-To-Go Ultra-Portable Bluetooth Keyboard for Android and Windows,Mogitech Ultrathin Touch Mouse T630 for Windows 8 Touch Gestures",2025-12-13T15:41:40.230Z,dbfs:/databricks-datasets/retail-org/suppliers/suppliers.csv,7555
4407831,217197998,Mowepro,MA,GLOUCESTER,1930.0,ADAMS HILL ROAD,47.0,,MA,ESSEX,-70.679673,42.6571917,"Flipside 300 Backpack (Black),Adventura SH 140 II Shoulder Bag (Black),SF Slim Lens Pouch 75 AW,Mowepro - Slingshot Edge 250 AW Camera Backpack - Black",2025-12-13T15:41:40.230Z,dbfs:/databricks-datasets/retail-org/suppliers/suppliers.csv,7555
15865247,494797570,Olitscreens,CA,ANAHEIM,92807.0,E SHORECREST DR,7083.0,,,,-117.74693159999998,33.83939960000001,"R120WH2 ezFrame 2 58.7 x 104.7 Fixed Frame Projection Screen,T92UWH Portable Tripod Screen (45x80),Aeon 71.5 x 130.9 16:9 Fixed Frame Projection Screen with CineWhite Projection Surface,ELECTRIC100H Spectrum Motorized Projection Screen (49 x 87,110V,60Hz),ER120WH2 SableFrame 2 58.8 x 104.6 Fixed Frame Projection Screen,M80UWH Manual Series Projection Screen (39.6 x 69.6)",2025-12-13T15:41:40.230Z,dbfs:/databricks-datasets/retail-org/suppliers/suppliers.csv,7555
10372100,716765936,Opple,NJ,CAMDEN,8103.0,8th Street,908.0,,,,-75.11322,39.9352679,"15.4 NakBook Pro with Touch Bar (Late 2016,Space Gray),Opple MD825AM/A Lightning to VGA Adapter for iPhones,Opple NakBook Pro with Touch Bar - 13.3 - Core i5 - 8 GB RAM - 512 GB SSD - English""",2025-12-13T15:41:40.230Z,dbfs:/databricks-datasets/retail-org/suppliers/suppliers.csv,7555


root
 |-- SUPPLIER_ID: integer (nullable = true)
 |-- TAX_ID: integer (nullable = true)
 |-- supplier_name: string (nullable = true)
 |-- state: string (nullable = true)
 |-- city: string (nullable = true)
 |-- postcode: double (nullable = true)
 |-- street: string (nullable = true)
 |-- number: string (nullable = true)
 |-- unit: string (nullable = true)
 |-- region: string (nullable = true)
 |-- district: string (nullable = true)
 |-- lon: double (nullable = true)
 |-- lat: double (nullable = true)
 |-- items_provided: string (nullable = true)
 |-- _read_timestamp: timestamp (nullable = false)
 |-- _source_path: string (nullable = false)
 |-- _file_size: long (nullable = false)



In [0]:
# Write to Delta Bronze table
(
    df_bronze.write
        .format("delta")
        .option("overwriteSchema", "true")  # Bronze schema is authoritative
        .mode("overwrite")                  # Full refresh
        .saveAsTable(TARGET_TABLE)
)

print(f"Wrote Bronze table: {TARGET_TABLE}")

Wrote Bronze table: retail_project.bronze.suppliers


In [0]:
# Quick validation
spark.sql(f"SELECT COUNT(*) AS row_count FROM {TARGET_TABLE}").show()

+---------+
|row_count|
+---------+
|       15|
+---------+

