In [0]:
# -------------------------------
# 1️⃣ Variables
# -------------------------------
storage_account = "sbbapistorageaccount"
container = "data-container"
account_key = ""

# Spark configuration (only needed if not already set in your cluster)
spark.conf.set(
    f"fs.azure.account.key.{storage_account}.dfs.core.windows.net",
    account_key
)

# ADLS paths
weather_csv_path = f"abfss://{container}@{storage_account}.dfs.core.windows.net/data/weather-data.csv"
weather_params_csv_path = f"abfss://{container}@{storage_account}.dfs.core.windows.net/data/weather-data-parameters.csv"

bronze_weather_path = f"abfss://{container}@{storage_account}.dfs.core.windows.net/bronze/weather"

In [0]:
# -------------------------------
# 2️⃣ Read raw CSV files
# -------------------------------
df_weather = (
    spark.read
    .option("header", "true")
    .option("inferSchema", "true")
    .csv(weather_csv_path)
)

df_params = (
    spark.read
    .option("header", "true")
    .option("inferSchema", "true")
    .csv(weather_params_csv_path)
)

print("✅ CSV files successfully loaded")
df_weather.show(5, truncate=False)
df_params.show(5, truncate=False)

In [0]:
# -------------------------------
# 3️⃣ Check schema
# -------------------------------
df_weather.printSchema()
df_params.printSchema()

In [0]:
# -------------------------------
# 4️⃣ Clean data (littlebit) & Write Bronze Delta tables
# -------------------------------
def clean_column(colname):
    return (
        colname.strip()              # remove leading/trailing spaces
              .lower()               # lowercase
              .replace(" ", "_")     # replace spaces with underscores
              .replace(";", "_")     # replace semicolons
              .replace("(", "")      # remove (
              .replace(")", "")      # remove )
              .replace("{", "")      # remove {
              .replace("}", "")      # remove }
              .replace("=", "_")     # replace =
              .replace("-", "_")     # replace hyphen with underscore
    )

df_weather = df_weather.toDF(*[clean_column(c) for c in df_weather.columns])
df_params = df_params.toDF(*[clean_column(c) for c in df_params.columns])

# 3️⃣ Write to Bronze Delta
df_weather.write.format("delta").mode("overwrite").save(bronze_weather_path + "/data")
df_params.write.format("delta").mode("overwrite").save(bronze_weather_path + "/params")

print("✅ Weather (data & params) Bronze tables saved successfully")

In [0]:
# -------------------------------
# 5️⃣ Register SQL tables
# -------------------------------
spark.sql(f"""
CREATE TABLE IF NOT EXISTS bronze_weather_data
USING DELTA
LOCATION '{bronze_weather_path}/data'
""")

spark.sql(f"""
CREATE TABLE IF NOT EXISTS bronze_weather_parameters
USING DELTA
LOCATION '{bronze_weather_path}/params'
""")

print("✅ SQL tables bronze_weather_data and bronze_weather_parameters successfully created")


In [0]:
spark.sql(f"""
SELECT *
FROM bronze_weather_data
LIMIT 3
""").show(truncate=False)