In [7]:
import ConnectionConfig as cc

# Set up the environment and start Spark session
cc.setupEnvironment()
spark = cc.startLocalCluster("DIM_WEATHER", 4)
spark.getActiveSession()

In [8]:
# Read CSV file with weather data
df_weather = spark.read.csv("./FileStore/tables/weather.csv", header=True, inferSchema=True)
df_weather.show()

+----------+------------+---------+------------+
|weather_id| temperature|condition|weather_type|
+----------+------------+---------+------------+
|         1|        >=15|    Sunny|    pleasant|
|         2|         <15|    Rainy|  unpleasant|
|         3|<15 and >-10|   Cloudy|     natural|
|         4|         any|  unknown|     unknown|
+----------+------------+---------+------------+



In [9]:
#Transform Data:
# Transform columns if needed, for example, renaming for clarity
dimWeather = df_weather.withColumnRenamed("weather_id", "weatherSK") \
    .withColumnRenamed("temperature", "temperature_condition") \
    .withColumnRenamed("condition", "weather_condition") \
    .withColumnRenamed("weather_type", "weather_category")

# Define path for the Delta table
weather_delta_path = "spark-warehouse/dimWeather"
# Save dimWeather DataFrame as a Delta table
dimWeather.write.format("delta").mode("overwrite").save(weather_delta_path)

# Show transformed DataFrame
dimWeather.show()

+---------+---------------------+-----------------+----------------+
|weatherSK|temperature_condition|weather_condition|weather_category|
+---------+---------------------+-----------------+----------------+
|        1|                 >=15|            Sunny|        pleasant|
|        2|                  <15|            Rainy|      unpleasant|
|        3|         <15 and >-10|           Cloudy|         natural|
|        4|                  any|          unknown|         unknown|
+---------+---------------------+-----------------+----------------+



In [10]:
# Create or replace the temporary view for weather dimension
dimWeather.createOrReplaceTempView("dimWeather")

# Display the dimension table
spark.sql("SELECT * FROM dimWeather").show()


+---------+---------------------+-----------------+----------------+
|weatherSK|temperature_condition|weather_condition|weather_category|
+---------+---------------------+-----------------+----------------+
|        1|                 >=15|            Sunny|        pleasant|
|        2|                  <15|            Rainy|      unpleasant|
|        3|         <15 and >-10|           Cloudy|         natural|
|        4|                  any|          unknown|         unknown|
+---------+---------------------+-----------------+----------------+



In [11]:
# Write DataFrame to Delta table
dimWeather.write.format("delta").mode("overwrite").saveAsTable("dimWeather")


In [12]:
spark.stop()