###Importing the modules

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

###Adding start date and end date

In [0]:
start_date = "2015-01-01"
end_date   = "2030-12-31"

###Adding business related columns

In [0]:
dim_date_df = (
    spark.sql(f"""
        SELECT explode(sequence(
            to_date('{start_date}'),
            to_date('{end_date}'),
            interval 1 day
        )) AS date
    """)
    .withColumn("date_sk", date_format(col("date"), "yyyyMMdd").cast("int"))
    .withColumn("day", dayofmonth("date"))
    .withColumn("month", month("date"))
    .withColumn("month_name", date_format("date", "MMMM"))
    .withColumn("quarter", quarter("date"))
    .withColumn("year", year("date"))
    .withColumn("day_of_week", dayofweek("date"))
    .withColumn("day_name", date_format("date", "EEEE"))
    .withColumn("week_of_year", weekofyear("date"))
    .withColumn("is_weekend", col("day_of_week").isin(1, 7))
)

###Creating table for dim_date

In [0]:
spark.sql("""
CREATE TABLE IF NOT EXISTS retail_analytics.gold.dim_date (
    date_sk INT,
    date DATE,
    day INT,
    month INT,
    month_name STRING,
    quarter INT,
    year INT,
    day_of_week INT,
    day_name STRING,
    week_of_year INT,
    is_weekend BOOLEAN
)
USING DELTA
""")

DataFrame[]

In [0]:
dim_date_df.write.mode("append").saveAsTable("retail_analytics.gold.dim_date")

In [0]:
spark.read.table("retail_analytics.gold.dim_date").limit(5).display()  

date_sk,date,day,month,month_name,quarter,year,day_of_week,day_name,week_of_year,is_weekend
20150101,2015-01-01,1,1,January,1,2015,5,Thursday,1,False
20150102,2015-01-02,2,1,January,1,2015,6,Friday,1,False
20150103,2015-01-03,3,1,January,1,2015,7,Saturday,1,True
20150104,2015-01-04,4,1,January,1,2015,1,Sunday,1,True
20150105,2015-01-05,5,1,January,1,2015,2,Monday,2,False


In [0]:
spark.read.table("retail_analytics.gold.dim_date").count()

5844