# Config

In [1]:
from delta import DeltaTable
from datetime import datetime

import ConnectionConfig as cc
cc.setupEnvironment()

# Start local cluster

In [2]:
spark = cc.startLocalCluster("DIM_LOCK")
spark.getActiveSession()

# Ophalen van gegevens (en opslaan in data warehouse)

In [3]:
#Extract
cc.set_connectionProfile("VeloDB")

df_lock = spark.read \
    .format("jdbc") \
    .option("driver" , cc.get_Property("driver")) \
    .option("url", cc.create_jdbc()) \
    .option("dbtable", "locks") \
    .option("user", cc.get_Property("username")) \
    .option("password", cc.get_Property("password")) \
    .option("partitionColumn", "lockid") \
    .option("numPartitions", 4) \
    .option("lowerBound", 0) \
    .option("upperBound", 8000) \
    .load()

df_station = spark.read \
    .format("jdbc") \
    .option("driver" , cc.get_Property("driver")) \
    .option("url", cc.create_jdbc()) \
    .option("dbtable", "stations") \
    .option("user", cc.get_Property("username")) \
    .option("password", cc.get_Property("password")) \
    .option("partitionColumn", "stationid") \
    .option("numPartitions", 4) \
    .option("lowerBound", 0) \
    .option("upperBound", 350) \
    .load()

df_lock.show(5)
df_station.show(5)

+------+-------------+---------+---------+
|lockid|stationlocknr|stationid|vehicleid|
+------+-------------+---------+---------+
|     1|            1|        1|     NULL|
|     2|            2|        1|     NULL|
|     3|            3|        1|     NULL|
|     4|            4|        1|     NULL|
|     5|            5|        1|     NULL|
+------+-------------+---------+---------+
only showing top 5 rows

+---------+--------+---------+------------+--------------------+------+-------+---------+-----------------+-------------------+-------+------+
|stationid|objectid|stationnr|        type|              street|number|zipcode| district|         gpscoord|     additionalinfo|labelid|cityid|
+---------+--------+---------+------------+--------------------+------+-------+---------+-----------------+-------------------+-------+------+
|        1|   33202|      026|DUBBELZIJDIG|         Meir (2000)|    84|   2000|ANTWERPEN|(51.2182,4.41241)|                   |   NULL|  NULL|
|        2|   33

In [4]:
#Transform
df_lock.createOrReplaceTempView("source_Lock")
df_station.createOrReplaceTempView("source_Station")

#querien van de data
df_dim_lock = spark.sql(f"select uuid() as lock_SK, lock.lockid, lock.stationLockNr, station.stationid, station.stationNr, station.type,\
                        station.zipcode,station.street, station.number, station.district\
                        from source_Lock as lock\
                        join source_Station as station on station.stationid = lock.stationid")

df_dim_lock.show()

+--------------------+------+-------------+---------+---------+-----------+-------+--------------------+------+---------+
|             lock_SK|lockid|stationLockNr|stationid|stationNr|       type|zipcode|              street|number| district|
+--------------------+------+-------------+---------+---------+-----------+-------+--------------------+------+---------+
|f8c21fbf-7de1-4b4...|   217|            1|       12|      120|ENKELZIJDIG|   2060|Schijnpoortweg (2...| 27-29|ANTWERPEN|
|9a903ae0-cb8b-4f6...|   218|            2|       12|      120|ENKELZIJDIG|   2060|Schijnpoortweg (2...| 27-29|ANTWERPEN|
|3e0a4a5f-6953-45a...|   219|            3|       12|      120|ENKELZIJDIG|   2060|Schijnpoortweg (2...| 27-29|ANTWERPEN|
|0165312e-9a8d-43e...|   220|            4|       12|      120|ENKELZIJDIG|   2060|Schijnpoortweg (2...| 27-29|ANTWERPEN|
|3644beda-249c-447...|   221|            5|       12|      120|ENKELZIJDIG|   2060|Schijnpoortweg (2...| 27-29|ANTWERPEN|
|ab23169e-bb2c-473...|  

In [5]:
#Transform
# Adding the row for "geen slot" types
df_dim_lock.createOrReplaceTempView("df_dim_lock")
geen_slot_type = spark.sql(f"select uuid() as lock_SK, null as lockid, null as stationLockNr, null as stationid, null as stationNr, 'geen slot' as type, null as zipcode ,null as street, null as number, null as district")
geen_slot_type.show()

df_dim_lock_no_lock = spark.sql("SELECT * FROM df_dim_lock").union(geen_slot_type)
df_dim_lock_no_lock.show()


+--------------------+------+-------------+---------+---------+---------+-------+------+------+--------+
|             lock_SK|lockid|stationLockNr|stationid|stationNr|     type|zipcode|street|number|district|
+--------------------+------+-------------+---------+---------+---------+-------+------+------+--------+
|689a5ed8-2073-4ff...|  NULL|         NULL|     NULL|     NULL|geen slot|   NULL|  NULL|  NULL|    NULL|
+--------------------+------+-------------+---------+---------+---------+-------+------+------+--------+

+--------------------+------+-------------+---------+---------+-----------+-------+--------------------+------+---------+
|             lock_SK|lockid|stationLockNr|stationid|stationNr|       type|zipcode|              street|number| district|
+--------------------+------+-------------+---------+---------+-----------+-------+--------------------+------+---------+
|f8c21fbf-7de1-4b4...|   234|           18|       12|      120|ENKELZIJDIG|   2060|Schijnpoortweg (2...| 27-

In [6]:
#Load
df_dim_lock_no_lock.write.format("delta").mode("overwrite").saveAsTable("dimLock")

In [7]:
spark.stop()