In [0]:
# Gold targets (business-ready tables)
SILVER_TABLE = "workspace.default.silver_events"
GOLD_DAILY_TABLE = "workspace.default.gold_daily_funnel"
GOLD_SESSION_TABLE = "workspace.default.gold_session_funnel"


In [0]:
from pyspark.sql import functions as F

# Read clean Silver data + create date column
df = (
    spark.table(SILVER_TABLE)
    .withColumn("event_date", F.to_date("event_time"))
)


In [0]:
# Daily funnel metrics from events
df_daily = (
    df.groupBy("event_date")
      .agg(
          F.sum(F.when(F.col("event_type") == "view", 1).otherwise(0)).alias("views"),
          F.sum(F.when(F.col("event_type") == "cart", 1).otherwise(0)).alias("carts"),
          F.sum(F.when(F.col("event_type") == "purchase", 1).otherwise(0)).alias("purchases"),
          F.sum(F.when(F.col("event_type") == "purchase", F.col("price")).otherwise(0.0)).alias("daily_revenue")
      )
)


In [0]:
# Conversion rates (safe division)
df_daily = (
    df_daily
    .withColumn("view_to_cart_rate", F.when(F.col("views") > 0, F.col("carts") / F.col("views")).otherwise(F.lit(0.0)))
    .withColumn("cart_to_purchase_rate", F.when(F.col("carts") > 0, F.col("purchases") / F.col("carts")).otherwise(F.lit(0.0)))
    .withColumn("view_to_purchase_rate", F.when(F.col("views") > 0, F.col("purchases") / F.col("views")).otherwise(F.lit(0.0)))
)


In [0]:
# Save Gold daily funnel as Delta table
(
    df_daily.write
    .mode("overwrite")
    .format("delta")
    .saveAsTable(GOLD_DAILY_TABLE)
)

print(f"✅ Gold daily funnel created: {GOLD_DAILY_TABLE}")


In [0]:
# Quick sanity check
display(
    spark.table(GOLD_DAILY_TABLE)
         .orderBy("event_date")
         .limit(25)
)


In [0]:
# Session funnel: 1 row per session with funnel flags
df_session = (
    df.groupBy("user_session")
      .agg(
          F.max(F.when(F.col("event_type") == "view", 1).otherwise(0)).alias("has_view"),
          F.max(F.when(F.col("event_type") == "cart", 1).otherwise(0)).alias("has_cart"),
          F.max(F.when(F.col("event_type") == "purchase", 1).otherwise(0)).alias("has_purchase"),
          F.sum(F.when(F.col("event_type") == "purchase", F.col("price")).otherwise(0.0)).alias("session_revenue")
      )
)


In [0]:
# Aggregate sessions into funnel KPIs
df_session_summary = (
    df_session.agg(
        F.count("*").alias("total_sessions"),
        F.sum("has_view").alias("sessions_with_view"),
        F.sum("has_cart").alias("sessions_with_cart"),
        F.sum("has_purchase").alias("sessions_with_purchase"),
        F.sum("session_revenue").alias("total_revenue")
    )
    .withColumn("view_to_cart_rate", F.when(F.col("sessions_with_view") > 0, F.col("sessions_with_cart") / F.col("sessions_with_view")).otherwise(F.lit(0.0)))
    .withColumn("cart_to_purchase_rate", F.when(F.col("sessions_with_cart") > 0, F.col("sessions_with_purchase") / F.col("sessions_with_cart")).otherwise(F.lit(0.0)))
    .withColumn("view_to_purchase_rate", F.when(F.col("sessions_with_view") > 0, F.col("sessions_with_purchase") / F.col("sessions_with_view")).otherwise(F.lit(0.0)))
)


In [0]:
# Save Gold session funnel summary
(
    df_session_summary.write
    .mode("overwrite")
    .format("delta")
    .saveAsTable(GOLD_SESSION_TABLE)
)

print(f"✅ Gold session funnel created: {GOLD_SESSION_TABLE}")
display(spark.table(GOLD_SESSION_TABLE))
