In [0]:
# Drop if the date dimension exists
spark.sql("DROP TABLE IF EXISTS dimension.time")

Out[1]: DataFrame[]

In [0]:
from pyspark.sql.types import StringType
from pyspark.sql.functions import *
from pyspark.sql.types import DateType

# considering a random default date to build a timestamp
default_date = '2014-01-01'
datetime_list = []

# below loop generate a series of time ranging from 00:00:00 to 23:59:59
for hr in range(0,24):
    for mint in range(0,60):
        for sec in range(0,60):
            datetime = '{:s} {:02d}:{:02d}:{:02d}'.format(default_date, hr, mint, sec)
            datetime_list.append([datetime])

dfColumns = ["datetime"]
df = spark.createDataFrame(data=datetime_list, schema = dfColumns)

final_dim = df.select(to_timestamp(df.datetime).alias("input_datetime"))

# transform SQL statement to prepare the time dimension
final_dim = final_dim.withColumn("time",date_format('input_datetime', 'HH:mm:ss'))\
.withColumn("hour", hour(col("input_datetime")))\
.withColumn("minute", minute(col("input_datetime")))\
.withColumn("second", second(col("input_datetime")))

final_dim.select(["time", "hour", "minute", "second"]).show(truncate=False)

print(final_dim.show())

# Saves the data as a table in delta location.
final_dim.write.format("delta").mode("overwrite").saveAsTable("dimension.time")

+--------+----+------+------+
|time    |hour|minute|second|
+--------+----+------+------+
|00:00:00|0   |0     |0     |
|00:00:01|0   |0     |1     |
|00:00:02|0   |0     |2     |
|00:00:03|0   |0     |3     |
|00:00:04|0   |0     |4     |
|00:00:05|0   |0     |5     |
|00:00:06|0   |0     |6     |
|00:00:07|0   |0     |7     |
|00:00:08|0   |0     |8     |
|00:00:09|0   |0     |9     |
|00:00:10|0   |0     |10    |
|00:00:11|0   |0     |11    |
|00:00:12|0   |0     |12    |
|00:00:13|0   |0     |13    |
|00:00:14|0   |0     |14    |
|00:00:15|0   |0     |15    |
|00:00:16|0   |0     |16    |
|00:00:17|0   |0     |17    |
|00:00:18|0   |0     |18    |
|00:00:19|0   |0     |19    |
+--------+----+------+------+
only showing top 20 rows

+-------------------+--------+----+------+------+
|     input_datetime|    time|hour|minute|second|
+-------------------+--------+----+------+------+
|2014-01-01 00:00:00|00:00:00|   0|     0|     0|
|2014-01-01 00:00:01|00:00:01|   0|     0|     1|
|201