In [0]:
silver_df = spark.table("ecommerce_ai.silver_customer_features")
silver_df.display()


In [0]:
from pyspark.sql.functions import col, when


gold_df = (
    silver_df
    .withColumn(
        "churn_risk_segment",
        when(col("recency_days") > 120, "High Risk")
        .when(col("recency_days") > 90, "Medium Risk")
        .otherwise("Low Risk")
    )
)


In [0]:
gold_df = (
    gold_df
    .withColumn(
        "recommended_action",
        when(col("churn_risk_segment") == "High Risk", "Offer Discount Coupon")
        .when(col("churn_risk_segment") == "Medium Risk", "Send Re-engagement Email")
        .otherwise("Loyalty Reward")
    )
)


In [0]:
final_gold_df = gold_df.select(
    "CustomerID",
    "recency_days",
    "purchase_frequency",
    "total_spend",
    "avg_order_value",
    "churn_label",
    "churn_risk_segment",
    "recommended_action"
)

final_gold_df.display()


In [0]:
final_gold_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable("ecommerce_ai.gold_customer_churn_insights")


In [0]:
%sql
SELECT
    churn_risk_segment,
    COUNT(*) AS customers
FROM ecommerce_ai.gold_customer_churn_insights
GROUP BY churn_risk_segment;


In [0]:
%sql
SELECT
    churn_risk_segment,
    SUM(total_spend) AS revenue_at_risk
FROM ecommerce_ai.gold_customer_churn_insights
GROUP BY churn_risk_segment;


In [0]:
%sql
SELECT *
FROM ecommerce_ai.gold_customer_churn_insights
WHERE churn_risk_segment = 'High Risk'
ORDER BY total_spend DESC
LIMIT 20;
