In [8]:
from pyspark.sql import functions as F

# 1. Check if dim_date exists
tables = [t.name for t in spark.catalog.listTables()]
table_exists = "dim_date" in tables

end_date = F.current_date()

if table_exists:
    existing = spark.table("dim_date")
    max_date = existing.agg(F.max("Date")).first()[0]

    # Prevent generating dates beyond today
    start_date = F.least(F.date_add(F.lit(max_date), 1), end_date)

else:
    existing = None
    start_date = F.to_date(F.lit("2010-01-01"))

# 2. Generate candidate dates
df = (
    spark.range(0, 1)
    .select(F.sequence(start_date, end_date).alias("Date"))
    .withColumn("Date", F.explode("Date"))
)

# 2b. Safety-Net: never write duplicates
if table_exists:
    existing_dates = existing.select("Date")
    df = df.join(existing_dates, on="Date", how="left_anti")

if df.rdd.isEmpty():
    print("No new dates to add.")
else:
    df = (
        df
        .withColumn("Year", F.year("Date"))
        .withColumn("Month", F.month("Date"))
        .withColumn("MonthName", F.date_format("Date", "MMMM"))
        .withColumn("MonthShort", F.date_format("Date", "MMM"))
        .withColumn("Quarter", F.concat(F.lit("Q"), F.quarter("Date")))
        .withColumn("Weekday", F.dayofweek("Date"))
        .withColumn("WeekdayName", F.date_format("Date", "EEEE"))
        .withColumn("IsWeekend", F.dayofweek("Date").isin(1, 7))
    )

    df.write.format("delta").mode("append" if table_exists else "overwrite").saveAsTable("dim_date")
    print("dim_date updated.")


StatementMeta(, 8fb086a9-aab7-428e-b8fd-10c15cf7314e, 10, Finished, Available, Finished)

No new dates to add.


In [7]:
from pyspark.sql import functions as F

spark.table("dim_date") \
     .select(F.max("Date").alias("max_date")) \
     .show()


StatementMeta(, 8fb086a9-aab7-428e-b8fd-10c15cf7314e, 9, Finished, Available, Finished)

+----------+
|  max_date|
+----------+
|2026-01-18|
+----------+

