In [0]:
# Databricks Job-Safe Silver Layer Notebook

from pyspark.sql.functions import col, when

print("Starting Silver Feature Engineering...")

# 1. Read Bronze table
df_raw = spark.table("ecommerce.mental_health.bronze_social_media_raw")
print("Bronze rows:", df_raw.count())

# 2. Clean Data
df_clean = df_raw.dropDuplicates().fillna({
    "Daily_Screen_Time_Hours": 0,
    "Sleep_Duration_Hours": 0,
    "GAD_7_Score": 0,
    "PHQ_9_Score": 0
})

print("Cleaned rows:", df_clean.count())

# 3. Feature Engineering
df_features = df_clean \
    .withColumn("High_Screen_Time",
                when(col("Daily_Screen_Time_Hours") > 6, 1).otherwise(0)) \
    .withColumn("Sleep_Deprived",
                when(col("Sleep_Duration_Hours") < 6, 1).otherwise(0)) \
    .withColumn("LateNight_Sleep_Risk",
                col("Late_Night_Usage") * col("Sleep_Deprived")) \
    .withColumn("Passive_Usage_Risk",
                when(col("Activity_Type") == "Passive", 1).otherwise(0)) \
    .withColumn("Social_Comparison_Risk",
                col("Social_Comparison_Trigger")) \
    .withColumn("High_Mental_Health_Risk",
                when((col("GAD_7_Score") >= 10) | (col("PHQ_9_Score") >= 10), 1).otherwise(0))

print("Feature engineering complete")

# 4. Write Silver table
df_features.write.format("delta") \
    .mode("overwrite") \
    .saveAsTable("ecommerce.mental_health.silver_user_features")

print("Silver table written successfully!")


Starting Silver Feature Engineering...
Bronze rows: 8000
Cleaned rows: 8000
Feature engineering complete
Silver table written successfully!
