In [0]:
dbutils.widgets.text("source_path", "/Volumes/workspace/ecommerce", "Source Path")
dbutils.widgets.dropdown("layer", "bronze", ["bronze","silver","gold"], "Layer")

# Read parameter values
source_path = dbutils.widgets.get("source_path")
layer = dbutils.widgets.get("layer")

print(f"Running ETL for layer: {layer} using source path: {source_path}")

from pyspark.sql.functions import current_timestamp, col, to_date, sum, count, countDistinct

def run_layer(layer_name, source_path):
    
    if layer_name == "bronze":
        print("Executing Bronze Layer...")
        
        # Create Bronze volume if not exists (SQL cell)
        spark.sql("CREATE VOLUME IF NOT EXISTS workspace.ecommerce.bronze")
        
        # Read raw CSV data
        oct_df = spark.read.csv(
            f"{source_path}/ecommerce_data/2019-Oct.csv",
            header=True,
            inferSchema=True
        )
        
        # Add ingestion timestamp
        bronze_df = oct_df.withColumn("ingestion_time", current_timestamp())
        
        # Bronze Delta path
        bronze_path = f"{source_path}/bronze/ecommerce_events"
        
        # Write Bronze data
        bronze_df.write.format("delta").mode("overwrite").save(bronze_path)
        
        # Sanity checks
        print("Bronze row count:", bronze_df.count())
        display(bronze_df.limit(10))
        bronze_df.printSchema()
    
    elif layer_name == "silver":
        print("Executing Silver Layer...")
        
        # Create Silver volume
        spark.sql("CREATE VOLUME IF NOT EXISTS workspace.ecommerce.silver")
        
        # Read Bronze data
        bronze_df = spark.read.format("delta").load(f"{source_path}/bronze/ecommerce_events")
        
        # Clean and validate
        silver_df = (
            bronze_df
            .filter(col("user_id").isNotNull())
            .filter(col("event_type").isin("view", "cart", "purchase"))
            .filter((col("price").isNull()) | (col("price") >= 0))
            .dropDuplicates()
        )
        
        # Silver Delta path
        silver_path = f"{source_path}/silver/ecommerce_events_clean"
        
        # Write Silver data
        silver_df.write.format("delta").mode("overwrite").save(silver_path)
        
        # Checks
        print("Bronze rows:", bronze_df.count())
        print("Silver rows:", silver_df.count())
        print("Null user_id count:", silver_df.filter(col("user_id").isNull()).count())
        silver_df.groupBy("event_type").count().show()
        print("Negative price count:", silver_df.filter(col("price") < 0).count())
        display(silver_df.limit(10))
    
    elif layer_name == "gold":
        print("Executing Gold Layer...")
        
        # Create Gold volume
        spark.sql("CREATE VOLUME IF NOT EXISTS workspace.ecommerce.gold")
        
        # Read Silver data
        silver_df = spark.read.format("delta").load(f"{source_path}/silver/ecommerce_events_clean")
        
        # Aggregates for analytics
        gold_df = (
            silver_df
            .filter(col("event_type") == "purchase")
            .withColumn("event_date", to_date("event_time"))
            .groupBy("event_date")
            .agg(
                sum("price").alias("total_revenue"),
                count("*").alias("total_orders"),
                countDistinct("user_id").alias("unique_customers")
            )
        )
        
        # Gold Delta path
        gold_path = f"{source_path}/gold/daily_sales_metrics"
        
        # Write Gold data
        gold_df.write.format("delta").mode("overwrite").save(gold_path)
        
        # Checks
        display(gold_df.orderBy("event_date").limit(10))
        print("Duplicate dates:", gold_df.count() - gold_df.select("event_date").distinct().count())
        gold_df.select(
            "event_date",
            "total_revenue",
            "total_orders",
            "unique_customers"
        ).summary().show()
    
    else:
        raise ValueError(f"Unknown layer: {layer_name}")

# Execute the layers
run_layer(layer, source_path)

Running ETL for layer: bronze using source path: /Volumes/workspace/ecommerce
Executing Bronze Layer...
Bronze row count: 42448764


event_time,event_type,product_id,category_id,category_code,brand,price,user_id,user_session,ingestion_time
2019-10-01T00:00:00.000Z,view,44600062,2103807459595387724,,shiseido,35.79,541312140,72d76fde-8bb3-4e00-8c23-a032dfed738c,2026-01-15T13:53:10.137Z
2019-10-01T00:00:00.000Z,view,3900821,2053013552326770905,appliances.environment.water_heater,aqua,33.2,554748717,9333dfbd-b87a-4708-9857-6336556b0fcc,2026-01-15T13:53:10.137Z
2019-10-01T00:00:01.000Z,view,17200506,2053013559792632471,furniture.living_room.sofa,,543.1,519107250,566511c2-e2e3-422b-b695-cf8e6e792ca8,2026-01-15T13:53:10.137Z
2019-10-01T00:00:01.000Z,view,1307067,2053013558920217191,computers.notebook,lenovo,251.74,550050854,7c90fc70-0e80-4590-96f3-13c02c18c713,2026-01-15T13:53:10.137Z
2019-10-01T00:00:04.000Z,view,1004237,2053013555631882655,electronics.smartphone,apple,1081.98,535871217,c6bd7419-2748-4c56-95b4-8cec9ff8b80d,2026-01-15T13:53:10.137Z
2019-10-01T00:00:05.000Z,view,1480613,2053013561092866779,computers.desktop,pulser,908.62,512742880,0d0d91c2-c9c2-4e81-90a5-86594dec0db9,2026-01-15T13:53:10.137Z
2019-10-01T00:00:08.000Z,view,17300353,2053013553853497655,,creed,380.96,555447699,4fe811e9-91de-46da-90c3-bbd87ed3a65d,2026-01-15T13:53:10.137Z
2019-10-01T00:00:08.000Z,view,31500053,2053013558031024687,,luminarc,41.16,550978835,6280d577-25c8-4147-99a7-abc6048498d6,2026-01-15T13:53:10.137Z
2019-10-01T00:00:10.000Z,view,28719074,2053013565480109009,apparel.shoes.keds,baden,102.71,520571932,ac1cd4e5-a3ce-4224-a2d7-ff660a105880,2026-01-15T13:53:10.137Z
2019-10-01T00:00:11.000Z,view,1004545,2053013555631882655,electronics.smartphone,huawei,566.01,537918940,406c46ed-90a4-4787-a43b-59a410c1a5fb,2026-01-15T13:53:10.137Z


root
 |-- event_time: timestamp (nullable = true)
 |-- event_type: string (nullable = true)
 |-- product_id: integer (nullable = true)
 |-- category_id: long (nullable = true)
 |-- category_code: string (nullable = true)
 |-- brand: string (nullable = true)
 |-- price: double (nullable = true)
 |-- user_id: integer (nullable = true)
 |-- user_session: string (nullable = true)
 |-- ingestion_time: timestamp (nullable = false)

