In [0]:
# 1. Setup Widgets for Parameterization
dbutils.widgets.text("source_path", "/Volumes/workspace/e-commerce_data/csv_files", "1. Source Data Path")
dbutils.widgets.text("target_database", "streaming_db", "2. Target Streaming Database")
dbutils.widgets.text("checkpoint_path", "/Volumes/workspace/e-commerce_data/csv_files/_checkpoints/ecommerce_pipeline", "3. Checkpoint Path")

# 2. Get Parameters
source_path = dbutils.widgets.get("source_path")
target_database = dbutils.widgets.get("target_database")
checkpoint_base_path = dbutils.widgets.get("checkpoint_path")

spark.sql(f"CREATE DATABASE IF NOT EXISTS {target_database}")

# --- THIS IS THE CORRECTED LINE ---
# It now uses the variable from the widget above.
dbutils.fs.rm(checkpoint_base_path, recurse=True)

print(f"Source Path: {source_path}")
print(f"Target Database: {target_database}")
print(f"Checkpoint Path: {checkpoint_base_path}")

# 3. Import Pipeline Functions
import sys
sys.path.append('/Workspace/Repos/3hmedgomaa2001@gmail.com/Ecommerce-Databricks-Solution')

from etl_pipeline.transformers import create_silver_layer
from etl_pipeline.writer import write_delta_table


def process_micro_batch(micro_batch_df, batch_id):
    """
    This function is applied to each micro-batch of data that the stream processes.
    It reuses our existing batch transformation logic.
    """
    print(f"--- Processing Micro-Batch ID: {batch_id} ---")
    

    properties_df = spark.table("default.properties_bronze") 
    category_df = spark.table("default.categories_bronze")
    
    bronze_dfs_for_batch = {
        "events": micro_batch_df,
        "properties": properties_df,
        "categories": category_df
    }
    
    # 1. Create the Silver layer for this micro-batch
    silver_micro_batch_df = create_silver_layer(bronze_dfs_for_batch)
    
    # 2. Write the resulting silver data to a streaming target table
    silver_table_name = f"{target_database}.events_enriched_silver_stream"
    silver_micro_batch_df.write.format("delta").mode("append").saveAsTable(silver_table_name)
    print(f"Appended micro-batch to {silver_table_name}")
    

events_stream_df = spark.readStream \
    .format("cloudFiles") \
    .option("cloudFiles.format", "csv") \
    .option("header", "true") \
    .option("inferSchema", "true") \
    .load(f"{source_path}/events.csv")

stream_query = events_stream_df.writeStream \
    .foreachBatch(process_micro_batch) \
    .option("checkpointLocation", checkpoint_base_path) \
    .trigger(availableNow=True) \
    .start()

print("Streaming query started.")
