### Importing the Needed Modules

In [0]:
import sys
import os

# Absolute path to the repo root
PROJECT_ROOT = "/Workspace/Users/thiruvengadamk16@gmail.com/Retail-And-Ecommerce-Analytics-Platform"

# Add repo root to PYTHONPATH (only once)
if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

# Debug checks (safe to remove later)
print("Current working directory:", os.getcwd())
print("Repo root added to path:", PROJECT_ROOT in sys.path)
from src.paths import *

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
import sys
sys.path.append("/Workspace/Users/thiruvengadamk16@gmail.com/Retail-And-Ecommerce-Analytics-Platform")
from src.paths import DIM_DATES_PATH
from src.schema_definitions import DIM_DATES_SCHEMA

### Creating a DataFrame with start and end date

In [0]:
start_date = "2020-01-01"
end_date   = "2029-12-31"

dates_df = (
    spark.createDataFrame([(start_date, end_date)], ["start", "end"])
)

### Creating a date column, range between start and end date

In [0]:
dates_df = dates_df.select(
    explode(
        sequence(
            to_date(col("start")),
            to_date(col("end"))
        )
    ).alias("date")
)

### Adding Business Related columns

In [0]:
dim_dates_df = (
    dates_df
    .withColumn("date_sk", date_format(col("date"), "yyyyMMdd").cast("int"))
    .withColumn("day", dayofmonth(col("date")))
    .withColumn("month", month(col("date")))
    .withColumn("month_name", date_format(col("date"), "MMMM"))
    .withColumn("quarter", quarter(col("date")))
    .withColumn("year", year(col("date")))
    .withColumn("day_of_week", dayofweek(col("date")))
    .withColumn("day_name", date_format(col("date"), "EEEE"))
    .withColumn("is_weekend",
    when(dayofweek(col("date")).isin(1, 7), True).otherwise(False)
    )
)


### Dim_date Schema for Reference

In [0]:
DIM_DATES_SCHEMA

### Selecting needed columns

In [0]:
dim_dates_df = dim_dates_df.select(
        "date_sk",
        "date",
        "day",
        "month",
        "month_name",
        "quarter",
        "year",
        "day_of_week",
        "day_name",
        "is_weekend"
    )

### Schema Enforcement Check

In [0]:
expected_cols = set(DIM_DATES_SCHEMA.keys())
incoming_cols = set(dim_dates_df.columns)

unknown_cols = incoming_cols - expected_cols

print("Unknown columns in Bronze:", unknown_cols)


### Creating Gold Dim_dates Table

In [0]:
(
    dim_dates_df.write
    .format("delta")
    .mode("overwrite")
    .saveAsTable(DIM_DATES_PATH)
)


In [0]:
spark.read.table(DIM_DATES_PATH).limit(5).display()

In [0]:
spark.read.table(DIM_DATES_PATH).count()