In [0]:
from pyspark.sql.functions import col, when, count, avg

schema = "workspace.ad_tables"

# 1. Load Silver Funnel and User/App Metadata
df_funnel = spark.read.table(f"{schema}.silver_ad_funnel")
df_app = spark.read.table(f"{schema}.user_app_genre")
df_profile = spark.read.table(f"{schema}.user_profile") # Clean version from Silver if you saved it

# 2. Join User Interests (The Matchmaking logic)
# We join the funnel with app genres to see if the ad context matches the user's interest
gold_features = (df_funnel.alias("f")
    .join(df_app.alias("a"), "user_id", "left")
    .join(df_profile.alias("p"), "user_id", "left")
    .select(
        "f.id_md5",
        "f.user_id",
        "f.timestamp",
        "f.hour_of_day",
        "f.day_of_week",
        "f.device_type",
        "f.network_type",
        "f.ad_position",
        "f.campaign_id",
        "f.campaign_type",
        "a.app_cat",
        "a.primary_genre",
        "p.age_range",
        "p.gender",
        "p.state",
        "p.phone_price_range",
        "f.is_impression",
        "f.is_click"  # This is our LABEL (Target Variable)
    ))

# 3. Create a 'Match' Feature (Innovation Insight)
# Does the campaign type match the user's primary genre? (Example logic)
gold_features = gold_features.withColumn("is_affinity_match", 
    when((col("primary_genre") == "Videos") & (col("campaign_type") == "CPM"), 1)
    .when((col("primary_genre") == "Shopping") & (col("campaign_type") == "CPC"), 1)
    .otherwise(0))

# 4. Save to Gold Table
gold_features.write.mode("overwrite").saveAsTable(f"{schema}.gold_ml_features")

print("Gold Layer Table 'gold_ml_features' is ready for ML Training!")