###### Gold Dimension â€“ Date
Create a conformed date dimension with deterministic surrogate keys to support time-based analytics across all fact tables.

In [0]:
from pyspark.sql.functions import (
    col, year, quarter, month, dayofmonth,
    dayofweek, date_format, lit,  explode, sequence, to_date, expr
)
from pyspark.sql.types import DateType


In [0]:
#Define Date Range
START_DATE = "2024-01-01"
END_DATE   = "2025-12-31"


###### Generate Date Spine

Generates a continuous range of calendar dates to serve as the backbone of the date dimension.

In [0]:
df_date = (
    spark
    .range(1)  # dummy row to anchor the sequence
    .select(
        explode(
            sequence(
                to_date(lit(START_DATE)),
                to_date(lit(END_DATE)),
                expr("interval 1 day")
            )
        ).alias("full_date")
    )
)

###### Derive Date Attributes and Surrogate Key

Enriches the date spine with standard calendar attributes and assigns a deterministic surrogate key for fact table joins.

In [0]:
df_dim_date = (
    df_date
    # surrogate key (deterministic, industry standard)
    .withColumn("date_key", date_format(col("full_date"), "yyyyMMdd").cast("int"))

    # calendar attributes
    .withColumn("year", year(col("full_date")))
    .withColumn("quarter", quarter(col("full_date")))
    .withColumn("month", month(col("full_date")))
    .withColumn("day", dayofmonth(col("full_date")))
    .withColumn("day_of_week", dayofweek(col("full_date")))
    .withColumn("is_weekend", col("day_of_week").isin([1, 7]))
)

####### Persist Date Dimension

Writes the conformed date dimension to the Gold layer as a Delta table for reuse across all fact models.

In [0]:
GOLD_DIM_DATE_PATH = (
    "wasbs://gold@flightdatastorage.blob.core.windows.net/dim_date/"
)

(
    df_dim_date
    .write
    .format("delta")
    .mode("overwrite") 
    .save(GOLD_DIM_DATE_PATH)
)