In [0]:
# Setting up widgets
dbutils.widgets.removeAll()

# creating new widgets 
dbutils.widgets.text("source_path",'/Volumes/workspace/ecommerce/orders/Orders.csv',"Source file path")

# Setting up dropdown for process selection
dbutils.widgets.dropdown("layer","bronze",["bronze","silver","gold"],'Select Procvessing :Layer')

# get values from UI
source_file = dbutils.widgets.get("source_path")
active_layer = dbutils.widgets.get("layer")

print(f"Job Started for Layer: {active_layer}")
print(f"Job Processing Source: {source_file}")


In [0]:
from pyspark.sql import functions as F
from delta.tables import DeltaTable

def bronze():
    print("Processing Bronze Layer")
    df = spark.read.format('csv').option('header', 'true').load('/Volumes/workspace/ecommerce/orders/Orders.csv')
    df = df.withColumn("order_id", F.col("order_id").cast("int")) \
         .withColumn("customer_id", F.col("customer_id").cast("int")) \
         .withColumn("order_date", F.to_date(F.col("order_date"), "M/d/yyyy")) \
         .withColumn("amount", F.col("amount").cast("decimal(10,2)")) \
         .withColumn("status", F.col("status").cast("string"))\
         .withColumn("load_timestamp", F.current_timestamp())
    df.write.mode("overwrite").format("delta").save("/Volumes/workspace/ecommerce/orders/bronze")
    
def silver():
    print("Processing Silver Layer")
    bronze_df = spark.read.format('delta').load('/Volumes/workspace/ecommerce/orders/bronze')
    silver_df = bronze_df.filter(F.col("status") == "Completed").dropDuplicates([
        "order_id",
        "customer_id",
        "order_date",
        "amount"
    ])
    silver_path = "/Volumes/workspace/ecommerce/orders/silver"
    if DeltaTable.isDeltaTable(spark, silver_path):
        silver_table = DeltaTable.forPath(spark, silver_path)
        # Perform MERGE (upsert)
        silver_table.alias("target").merge(
            silver_df.alias("source"),
            """
            target.order_id = source.order_id AND
            target.customer_id = source.customer_id AND
            target.amount = source.amount AND
            target.order_date = source.order_date AND
            target.status = source.status
            """
        ).whenMatchedUpdateAll().whenNotMatchedInsertAll().execute()
    else:
        # First time: just write the Silver table
        silver_df.write.format("delta").mode("overwrite").save(silver_path)

def gold():
    print("Processing Gold Layer")
    silver_df = spark.read.format('delta').load('/Volumes/workspace/ecommerce/orders/silver')
    gold_df = silver_df.groupBy("customer_id").agg(
        F.count("*").alias("Total_orders"),
        F.sum("amount").alias("Total_revenue"),
        F.avg("amount").alias("avg_revenue")
    )
    gold_df.write.format("delta").mode("overwrite").save("/Volumes/workspace/ecommerce/orders/gold")

def run_layer(layer_name):
    if layer_name == "bronze":
        bronze()
    elif layer_name == "silver":
        silver()
    elif layer_name == "gold":
        gold()
    else:
        print("Invalid Layer")

# Example usage:
run_layer("bronze")   # runs Bronze workflow
run_layer("silver")   # runs Silver workflow
run_layer("gold")     # runs Gold workflow
