In [0]:
# Drop if the rider dimension exists
spark.sql("DROP TABLE IF EXISTS dimension.riders")

Out[3]: DataFrame[]

In [0]:
from pyspark.sql.types import StringType
from pyspark.sql.functions import *
from pyspark.sql.types import DateType

# Read the riders staging table
rider = spark.table("staging.riders")

# lambda function to derive rider type. A categorial column holding two values either Member or Casual
get_rider_type = udf(lambda x : 'Member' if x == 'True' else 'Casual', StringType())
rider = rider.withColumn("rider_type", get_rider_type("is_member"))

# transform SQL statement to prepare the riders dimension
final_dim = rider.select(["rider_id", "first", "last", "address", 
                          to_date(rider.birthday,'yyyy-MM-dd').alias("birthday"), 
                          to_date(rider.account_start_date,'yyyy-MM-dd').alias("account_start_date"),
                          to_date(rider.account_end_date,'yyyy-MM-dd').alias("account_end_date"), "rider_type"])
print(final_dim.show())

# Saves the data as a table in delta location.
final_dim.write.format("delta").mode("overwrite").saveAsTable("dimension.rider")

+--------+-----------+---------+--------------------+----------+------------------+----------------+----------+
|rider_id|      first|     last|             address|  birthday|account_start_date|account_end_date|rider_type|
+--------+-----------+---------+--------------------+----------+------------------+----------------+----------+
|    1001|   Jennifer|    Smith|     397 Diana Ferry|1976-08-10|        2019-11-01|      2020-09-01|    Member|
|    1002|      Karen|    Smith|644 Brittany Row ...|1998-08-10|        2022-02-04|            null|    Member|
|    1003|      Bryan|  Roberts|996 Dickerson Tur...|1999-03-29|        2019-08-26|            null|    Casual|
|    1004|      Jesse|Middleton|7009 Nathan Expre...|1969-04-11|        2019-09-14|            null|    Member|
|    1005|  Christine|Rodriguez|224 Washington Mi...|1974-08-27|        2020-03-24|            null|    Casual|
|    1006|     Alicia|   Taylor|   1137 Angela Locks|2004-01-30|        2020-11-27|      2021-12-01|    