In [0]:
from pyspark.sql import functions as F

# 1. Calculate features from the Silver table
gold_features_df = spark.table("ecommerce_prod.silver.cleaned_events") \
    .groupBy("user_id") \
    .agg(
        F.count("*").alias("interaction_count"),
        # Calculate spend
        F.sum(F.when(F.col("event_type") == "purchase", F.col("price")).otherwise(0)).alias("total_spend"),
        # Calculate weekend ratio: (count of weekend events / total events)
        (F.count(F.when(F.dayofweek("event_date").isin(1, 7), 1)) / F.count("*")).alias("weekend_ratio"),
        # Other features
        F.avg("price").alias("avg_viewed_price"),
        F.countDistinct("main_category").alias("category_diversity")
    )

# 2. Save with overwriteSchema to fix the table structure
gold_features_df.write.format("delta") \
    .mode("overwrite") \
    .option("overwriteSchema", "true") \
    .saveAsTable("ecommerce_prod.gold.user_ml_features")

print("âœ… Gold table updated with weekend_ratio!")