In [0]:
%run "/Workspace/Users/sundarasandeepteja@gmail.com/E-Commerce Analytics Medallion Architecture with GenAI/config/project_config"

In [0]:
# Databricks notebook source
# ======================================
# GOLD LAYER: DIM_DATE (DATE DIMENSION)
# ======================================

# MAGIC %run ../config/project_config

from pyspark.sql import functions as F

print("ðŸ¥‡ GOLD LAYER: Creating dim_date")
print("=" * 60)

# ======================================
# GENERATE DATE RANGE
# ======================================
print("\nðŸ“… Generating date range...")

start_date = "2020-01-01"
end_date = "2026-12-31"

# Create date spine
date_df = spark.sql(f"""
    SELECT explode(sequence(
        to_date('{start_date}'), 
        to_date('{end_date}'), 
        interval 1 day
    )) as date
""")

print(f"  Date range: {start_date} to {end_date}")

# ======================================
# ADD DATE ATTRIBUTES
# ======================================
print("\nðŸ”§ Adding date attributes...")

dim_date = date_df \
    .withColumn("date_key", F.date_format("date", "yyyyMMdd").cast("int")) \
    .withColumn("year", F.year("date")) \
    .withColumn("quarter", F.quarter("date")) \
    .withColumn("month", F.month("date")) \
    .withColumn("month_name", F.date_format("date", "MMMM")) \
    .withColumn("month_short", F.date_format("date", "MMM")) \
    .withColumn("week_of_year", F.weekofyear("date")) \
    .withColumn("day_of_month", F.dayofmonth("date")) \
    .withColumn("day_of_week", F.dayofweek("date")) \
    .withColumn("day_of_year", F.dayofyear("date")) \
    .withColumn("day_name", F.date_format("date", "EEEE")) \
    .withColumn("day_short", F.date_format("date", "EEE")) \
    .withColumn("is_weekend", 
        F.when(F.dayofweek("date").isin(1, 7), True).otherwise(False)
    ) \
    .withColumn("is_weekday",
        F.when(F.dayofweek("date").isin(2, 3, 4, 5, 6), True).otherwise(False)
    ) \
    .withColumn("year_month", F.date_format("date", "yyyy-MM")) \
    .withColumn("year_quarter", 
        F.concat(F.year("date"), F.lit("-Q"), F.quarter("date"))
    ) \
    .withColumn("fiscal_year",
        F.when(F.month("date") >= 7, F.year("date") + 1).otherwise(F.year("date"))
    ) \
    .withColumn("fiscal_quarter",
        F.when(F.month("date").isin(7, 8, 9), 1)
         .when(F.month("date").isin(10, 11, 12), 2)
         .when(F.month("date").isin(1, 2, 3), 3)
         .otherwise(4)
    ) \
    .withColumn("is_month_start", 
        F.when(F.dayofmonth("date") == 1, True).otherwise(False)
    ) \
    .withColumn("is_month_end",
        F.when(F.last_day("date") == F.col("date"), True).otherwise(False)
    ) \
    .withColumn("_loaded_at", F.current_timestamp())

print("  âœ… Added 20+ date attributes")

# ======================================
# WRITE TO GOLD
# ======================================
print("\nðŸ’¾ Writing to Gold layer...")

dim_date.write \
    .format("delta") \
    .mode("overwrite") \
    .option("overwriteSchema", "true") \
    .saveAsTable(GOLD_DIM_DATE_TABLE)

print(f"  âœ… Created: {GOLD_DIM_DATE_TABLE}")
print(f"  ðŸ“Š Records: {dim_date.count():,}")

# Show sample
print("\nðŸ“‹ Sample Records:")
dim_date.select(
    "date", "date_key", "year", "month_name", "day_name", "is_weekend"
).show(10)

print("\nðŸ¥‡ DIM_DATE COMPLETE!")