# Gold Dimension: Date
This notebook generates the static calendar dimension for the Gold layer.

In [None]:
from pyspark.sql.functions import col, lit, when, year, month, quarter, weekofyear, dayofweek, dayofmonth, date_format, last_day, expr
from pyspark.sql.types import IntegerType

In [None]:
# UNITY CATALOG CONFIG (MANDATORY)
CATALOG = "workspace"
SCHEMA = "default"
TABLE = "dim_date"

spark.sql(f"USE CATALOG {CATALOG}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA}")
spark.sql(f"USE SCHEMA {SCHEMA}")

TARGET_TABLE = f"{CATALOG}.{SCHEMA}.{TABLE}"

In [None]:
# DATE RANGE CONFIGURATION
DATE_START = "2022-01-01"
DATE_END = "2026-12-31"
print(f"ðŸ“… Generating Date Dimension from {DATE_START} to {DATE_END}")

In [None]:
# GENERATE DATE SEQUENCE
df_dates = spark.sql(f"""
    SELECT explode(
        sequence(
            to_date('{DATE_START}'),
            to_date('{DATE_END}'),
            interval 1 day
        )
    ) AS date
""")

In [None]:
# ENRICH DATE ATTRIBUTES
df_dim_date = (
    df_dates
    .withColumn("date_sk", date_format(col("date"), "yyyyMMdd").cast(IntegerType()))
    .withColumn("year", year(col("date")))
    .withColumn("quarter", quarter(col("date")))
    .withColumn("month", month(col("date")))
    .withColumn("week", weekofyear(col("date")))
    .withColumn("day_of_month", dayofmonth(col("date")))
    .withColumn("day_of_week", dayofweek(col("date")))
    .withColumn("year_month", date_format(col("date"), "yyyy-MM"))
    .withColumn("year_quarter", expr("concat(year(date), '-Q', quarter(date))"))
    .withColumn("month_name", date_format(col("date"), "MMMM"))
    .withColumn("month_name_short", date_format(col("date"), "MMM"))
    .withColumn("day_name", date_format(col("date"), "EEEE"))
    .withColumn("day_name_short", date_format(col("date"), "EEE"))
    .withColumn(
        "is_weekend",
        when(dayofweek(col("date")).isin(1, 7), lit(True)).otherwise(lit(False))
    )
    .withColumn(
        "is_month_end",
        when(col("date") == last_day(col("date")), lit(True)).otherwise(lit(False))
    )
    .withColumn(
        "is_month_start",
        when(dayofmonth(col("date")) == 1, lit(True)).otherwise(lit(False))
    )
    .withColumn("fiscal_year", year(col("date")))
    .withColumn("fiscal_quarter", quarter(col("date")))
)

In [None]:
# SELECT FINAL COLUMN ORDER
df_dim_date = df_dim_date.select(
    "date_sk",
    "date",
    "year",
    "quarter",
    "month",
    "week",
    "day_of_month",
    "day_of_week",
    "year_month",
    "year_quarter",
    "month_name",
    "month_name_short",
    "day_name",
    "day_name_short",
    "is_weekend",
    "is_month_end",
    "is_month_start",
    "fiscal_year",
    "fiscal_quarter"
)

In [None]:
# WRITE TO GOLD (FULL REFRESH)
(
    df_dim_date.write
    .format("delta")
    .mode("overwrite")
    .option("overwriteSchema", "true")
    .saveAsTable(TARGET_TABLE)
)
print(f"âœ… Gold Date Dimension written to {TARGET_TABLE}")

In [None]:
# ZERO-COMPUTE VALIDATION
history = spark.sql(f"DESCRIBE HISTORY {TARGET_TABLE} LIMIT 1").first()
metrics = history["operationMetrics"]

print("=" * 60)
print("GOLD DIM_DATE VALIDATION")
print("=" * 60)
print(f"Operation      : {history['operation']}")
print(f"Timestamp      : {history['timestamp']}")
print(f"Rows Written   : {metrics.get('numOutputRows', 'N/A')}")
print(f"Files Written  : {metrics.get('numFiles', 'N/A')}")
print("=" * 60)