In [0]:
# Creation of schema by name "dimension" that hold all the dimensions (rider, date, time, station) together
spark.sql("CREATE SCHEMA IF NOT EXISTS dimension")

# Drop if the rider dimension exists
spark.sql("DROP TABLE IF EXISTS dimension.date")

Out[1]: DataFrame[]

In [0]:
from pyspark.sql.types import StringType
from pyspark.sql.functions import *
from pyspark.sql.types import DateType

# Read the trips and payments staging table
trips_df = spark.table("staging.trips")
payments_df = spark.table("staging.payments")

# Extract the min date from the payments & max date (with an offset of 5 years) from the trips
# It will help you identify the date range to buil the date dimension
min_dt = payments_df.select(to_date(min("date")).alias("min_date")).collect()[0][0]
max_dt = trips_df.select(add_months(to_date(max("ended_at")),(24*5))).collect()[0][0]

# Generate a sequence of data from min date to max date
(
  spark.sql(f"select explode(sequence(to_date('{min_dt}'), to_date('{max_dt}'), interval 1 day)) as calendarDate")
    .createOrReplaceTempView('dates_dim_temp')
)

# transform SQL statement to prepare the date dimension
final_dim = \
    spark.sql("\
          Select \
              calendarDate as date \
              ,year(calendarDate) as year \
              ,month(calendarDate) as month \
              ,quarter(calendarDate) as quarter \
              ,day(CalendarDate) as day \
              ,weekofyear(CalendarDate) as week \
              ,dayofweek(CalendarDate) as dayofweek \
          From dates_dim_temp")

print(final_dim.show())

# Saves the data as a table in delta location.
final_dim.write.format("delta").mode("overwrite").saveAsTable("dimension.date")

+----------+----+-----+-------+---+----+---------+
|      date|year|month|quarter|day|week|dayofweek|
+----------+----+-----+-------+---+----+---------+
|2013-02-01|2013|    2|      1|  1|   5|        6|
|2013-02-02|2013|    2|      1|  2|   5|        7|
|2013-02-03|2013|    2|      1|  3|   5|        1|
|2013-02-04|2013|    2|      1|  4|   6|        2|
|2013-02-05|2013|    2|      1|  5|   6|        3|
|2013-02-06|2013|    2|      1|  6|   6|        4|
|2013-02-07|2013|    2|      1|  7|   6|        5|
|2013-02-08|2013|    2|      1|  8|   6|        6|
|2013-02-09|2013|    2|      1|  9|   6|        7|
|2013-02-10|2013|    2|      1| 10|   6|        1|
|2013-02-11|2013|    2|      1| 11|   7|        2|
|2013-02-12|2013|    2|      1| 12|   7|        3|
|2013-02-13|2013|    2|      1| 13|   7|        4|
|2013-02-14|2013|    2|      1| 14|   7|        5|
|2013-02-15|2013|    2|      1| 15|   7|        6|
|2013-02-16|2013|    2|      1| 16|   7|        7|
|2013-02-17|2013|    2|      1|