In [0]:
# Main Pipeline Orchestration Notebook

# 1. Setup Widgets for Parameterization
dbutils.widgets.text("source_path", "/Volumes/workspace/e-commerce_data/csv_files", "1. Source Data Path")
dbutils.widgets.text("target_database", "default", "2. Target Database Name")

# 2. Get Parameters
source_path = dbutils.widgets.get("source_path")
target_database = dbutils.widgets.get("target_database")

# Ensure the target database exists
spark.sql(f"CREATE DATABASE IF NOT EXISTS {target_database}")

print(f"Source Path: {source_path}")
print(f"Target Database: {target_database}")

# 3. Import Pipeline Functions
# This allows the notebook to find our custom modules
import sys
sys.path.append('/Workspace/Users/3hmedgomaa2001@gmail.com/Ecommerce-Databricks-Solution/etl_pipeline')

from etl_pipeline.reader import read_source_data
from etl_pipeline.transformers import create_silver_layer, create_gold_layers
from etl_pipeline.writer import write_delta_table

# --- EXECUTE THE PIPELINE ---

# BRONZE LAYER
bronze_dfs = read_source_data(spark, source_path)
for name, df in bronze_dfs.items():
    write_delta_table(df, target_database, f"{name}_bronze")

# SILVER LAYER
silver_df = create_silver_layer(bronze_dfs)
write_delta_table(silver_df, target_database, "events_enriched_silver")

# GOLD LAYER
gold_dfs = create_gold_layers(silver_df)
for name, df in gold_dfs.items():
    write_delta_table(df, target_database, name)

print("\n--- ETL Pipeline Execution Complete ---")