In [0]:
%sql
USE CATALOG agriculture

In [0]:
# 1. Load Bronze Data
df_weather_bronze = spark.table("agriculture.bronze.weather_history")

In [0]:
from pyspark.sql.functions import col, trim, initcap

# 2. Apply Transformations
# - Rename columns for clarity
# - Standardize state_name (Title Case, Trim) to match Market & Soil tables
# - Cast metrics to Double
df_weather_silver = df_weather_bronze \
    .withColumnRenamed("state", "state_name") \
    .withColumnRenamed("avg_temp_c", "avg_temperature") \
    .withColumnRenamed("total_rainfall_mm", "total_rainfall") \
    .withColumnRenamed("avg_humidity_percent", "avg_humidity") \
    .withColumn("state_name", initcap(trim(col("state_name")))) \
    .withColumn("year", col("year").cast("integer")) \
    .withColumn("avg_temperature", col("avg_temperature").cast("double")) \
    .withColumn("total_rainfall", col("total_rainfall").cast("double")) \
    .withColumn("avg_humidity", col("avg_humidity").cast("double")) \
    .select(
        "state_name",
        "year",
        "avg_temperature",
        "total_rainfall",
        "avg_humidity"
    )

In [0]:
# 3. Preview Data
print(f"Total Weather Records: {df_weather_silver.count()}")
display(df_weather_silver)

Total Weather Records: 720


state_name,year,avg_temperature,total_rainfall,avg_humidity
Andhra Pradesh,1997,28.21,1191.08,69.56
Andhra Pradesh,1998,28.21,1100.41,71.95
Andhra Pradesh,1999,28.03,603.67,66.91
Andhra Pradesh,2000,27.74,1070.25,70.73
Andhra Pradesh,2001,28.08,910.13,68.69
Andhra Pradesh,2002,28.54,768.22,66.52
Andhra Pradesh,2003,28.31,857.23,68.83
Andhra Pradesh,2004,27.72,759.1,69.79
Andhra Pradesh,2005,27.95,1192.26,71.1
Andhra Pradesh,2006,27.65,1343.62,71.34


In [0]:
# 4. Write to Silver Table
df_weather_silver.write \
    .format("delta") \
    .mode("overwrite") \
    .option("overwriteSchema", "true") \
    .saveAsTable("agriculture.silver.weather_history")

print("Silver Weather History Table Created Successfully")

Silver Weather History Table Created Successfully
