In [0]:
dbutils.widgets.text("input_table", "workspace.ecommerce.ecommerce_delta_table")
dbutils.widgets.text("output_table", "workspace.ecommerce.user_features_silver")
dbutils.widgets.dropdown("write_mode", "overwrite", ["overwrite", "append"])

input_table = dbutils.widgets.get("input_table")
output_table = dbutils.widgets.get("output_table")
write_mode = dbutils.widgets.get("write_mode")

print(f"Input Table: {input_table}")
print(f"Output Table: {output_table}")
print(f"Write Mode: {write_mode}")

In [0]:
from pyspark.sql import functions as F

def create_user_features(source_table):
    print("Reading Bronze data...")
    df = spark.read.table(source_table)
    
    df = df.filter(
        (F.col("user_id").isNotNull()) &
        (F.col("price").isNotNull()) &
        (F.col("price") >= 0)
    )
    
    print("Creating user-level features...")
    features = df.groupBy("user_id").agg(
        F.count("*").alias("total_events"),
        F.sum(F.when(F.col("event_type") == "purchase", 1).otherwise(0)).alias("total_purchases"),
        F.sum("price").alias("total_spent"),
        F.avg("price").alias("avg_price"),
        F.countDistinct("product_id").alias("unique_products"),
        F.max("event_time").alias("last_activity")
    )
    
    features = features.fillna({
        "total_spent": 0,
        "avg_price": 0,
        "total_purchases": 0
    })
    
    return features

In [0]:
def validate_data(df):
    total = df.count()
    distinct_users = df.select("user_id").distinct().count()
    
    print(f"Total records: {total}")
    print(f"Distinct users: {distinct_users}")
    
    if total != distinct_users:
        raise Exception("Duplicate users found!")
    
    print("Data validation passed.")

In [0]:
features_df = create_user_features(input_table)

validate_data(features_df)

In [0]:
print("Writing Silver table...")

features_df.write \
    .format("delta") \
    .mode(write_mode) \
    .saveAsTable(output_table)

print("Write completed.")

In [0]:
dbutils.notebook.exit("User feature pipeline completed successfully.")