In [16]:
from pyspark.sql import functions as F

# Check if dim_date exists
tables = [t.name for t in spark.catalog.listTables()]
table_exists = "dim_date" in tables

# Today (UTC-safe)
today = F.current_date()

# Load existing table if it exist
if table_exists:
    existing = spark.table("dim_date")

    # Remove future dates if they exist
    existing = existing.filter(F.col("Date") <= today)

    # If table is empty after cleanup → treat as new
    if existing.count() == 0:
        table_exists = False
    else:
        max_date = existing.agg(F.max("Date")).first()[0]
else:
    existing = None


# 3. Determine start_date
if table_exists:
    # Next day after max_date
    start_date = F.date_add(F.lit(max_date), 1)
else:
    # Fresh environment → start from 2010
    start_date = F.to_date(F.lit("2010-01-01"))

# Clamp start_date to today (never generate future dates)
start_date = F.least(start_date, today)

# Generate candidate dates
df = (
    spark.range(0, 1)
    .select(F.sequence(start_date, today).alias("Date"))
    .withColumn("Date", F.explode("Date"))
)

# Remove duplicates (anti-join)
if table_exists:
    existing_dates = existing.select("Date")
    df = df.join(existing_dates, on="Date", how="left_anti")


# Stop if nothing to add
if df.rdd.isEmpty():
    print("No new dates to add.")
else:

    # Add attributes
    df = (
        df
        .withColumn("Year", F.year("Date"))
        .withColumn("Month", F.month("Date"))
        .withColumn("MonthName", F.date_format("Date", "MMMM"))
        .withColumn("MonthShort", F.date_format("Date", "MMM"))
        .withColumn("Quarter", F.concat(F.lit("Q"), F.quarter("Date")))
        .withColumn("Weekday", F.dayofweek("Date"))
        .withColumn("WeekdayName", F.date_format("Date", "EEEE"))
        .withColumn("IsWeekend", F.dayofweek("Date").isin(1, 7))
    )

    # Write to Delta
    write_mode = "append" if table_exists else "overwrite"

    df.write.format("delta").mode(write_mode).saveAsTable("dim_date")

    print("dim_date updated.")


StatementMeta(, 8fb086a9-aab7-428e-b8fd-10c15cf7314e, 18, Finished, Available, Finished)

dim_date updated.
