In [0]:
import datetime

# widget create
dbutils.widgets.text("run_date", "2019-10-01", "Run Date (YYYY-MM-DD)")
current_run_date = dbutils.widgets.get("run_date").strip()

try:
    datetime.date.fromisoformat(current_run_date)
    print(f"Valid Date Format.{current_run_date}")
except ValueError:
    raise ValueError(f"Invalid date format '{current_run_date}'. Expected YYYY-MM-DD.")

In [0]:
from pyspark.sql.functions import col, count, sum as _sum, when, lit
from pyspark.sql import DataFrame

def create_daily_features(bronze_table: str, target_date: str) -> DataFrame:
    """
    Reads raw bronze data, filters for target_date, generates user features,
    and attaches a run_date column for partition tracking.
    """
    print(f"Aggregating features for {target_date}...")
    
    df_bronze = spark.table(bronze_table)
    
    df_filtered = (
        df_bronze
        .filter(col("event_time").cast("date") == target_date)
        .filter(col("user_id").isNotNull())
    )
    
    # Feature Engineering
    df_features = (
        df_filtered
        .groupBy("user_id")
        .agg(
            count("*").alias("daily_interactions"),
            _sum(when(col("event_type") == "purchase", 1).otherwise(0)).alias("daily_purchases")
        )
        .withColumn("run_date", lit(target_date).cast("date"))
    )
    
    return df_features

In [0]:
bronze_table = "workspace.ecommerce.ecommerce_delta"
silver_table = "workspace.ecommerce.daily_user_features_silver"

# Generate Data
df_daily_features = create_daily_features(bronze_table, current_run_date)

# Idempotent Write using replaceWhere
df_daily_features.write \
    .format("delta") \
    .mode("overwrite") \
    .option("replaceWhere", f"run_date = '{current_run_date}'") \
    .saveAsTable(silver_table)

print(f"Daily features for {current_run_date} safely upserted to Silver layer")