In [1]:

from pyspark.sql import SparkSession
from delta import configure_spark_with_delta_pip
import ConnectionConfigKaloyan as cc
cc.setupEnvironment()


In [2]:
spark = cc.startLocalCluster("DIM_VEHICLE",4)
spark.getActiveSession()

In [3]:
# Loading table into spark data frames

biketype_df = spark.read \
    .format("jdbc") \
    .option("driver" , cc.get_Property("driver")) \
    .option("url", cc.create_jdbc()) \
    .option("dbtable", "bike_types") \
    .option("postgres", cc.get_Property("username")) \
    .option("strongage25", cc.get_Property("password")) \
    .option("partitionColumn", "biketypeid") \
    .option("numPartitions", 4) \
    .option("lowerBound", 0) \
    .option("upperBound", 20) \
    .load()


In [4]:
#making temporary view from the dataframe

biketype_df.createOrReplaceTempView("dimBiketype")


In [5]:
#joining the temporary views
vehicle_dimension_df = spark.sql("""
    SELECT 
        bt.biketypeid,
        bt.biketypedescription
    FROM 
        dimBikeType bt
""")

In [6]:
#displaying the dimension
vehicle_dimension_df.printSchema()
vehicle_dimension_df.show()

root
 |-- biketypeid: integer (nullable = true)
 |-- biketypedescription: string (nullable = true)

+----------+-------------------+
|biketypeid|biketypedescription|
+----------+-------------------+
|         1|          Velo Bike|
|         2|        Velo E-Bike|
|         3|               Step|
|         4|            Scooter|
+----------+-------------------+



In [7]:
# Save the Lock Dimension to a Delta table (or overwrite existing table)
#delta files
#vehicle_dimension_df.write.format("delta").mode("overwrite").option("mergeSchema", "true").saveAsTable("dimVehicle")

#parquet files
vehicle_dimension_df.repartition(1).write.format("parquet").mode("overwrite").saveAsTable("dimVehicle_pq")

In [9]:
spark.stop()