In [0]:
%sql
USE CATALOG agriculture

In [0]:
# 1. Load Bronze Data
df_soil_bronze = spark.table("agriculture.bronze.state_soil_profiles")

In [0]:
from pyspark.sql.functions import col, trim, initcap, avg

# 2. Apply Transformations
# - Rename single-letter columns to descriptive names (N -> nitrogen, etc.)
# - Standardize state names (Title Case, Trim) to match Market Data
# - Cast numeric columns to ensure they are doubles/floats
df_soil_silver = df_soil_bronze \
    .withColumnRenamed("N", "nitrogen_content") \
    .withColumnRenamed("P", "phosphorus_content") \
    .withColumnRenamed("K", "potassium_content") \
    .withColumnRenamed("pH", "ph_level") \
    .withColumnRenamed("state", "state_name") \
    .withColumn("state_name", initcap(trim(col("state_name")))) \
    .select(
        col("state_name"),
        col("nitrogen_content").cast("double"),
        col("phosphorus_content").cast("double"),
        col("potassium_content").cast("double"),
        col("ph_level").cast("double")
    )

In [0]:
# 3. Quality Check
# Ensure we don't have duplicates for states (there should only be 1 row per state)
# If duplicates exist, we aggregate by taking the average
df_soil_silver = df_soil_silver.groupBy("state_name").agg(
    avg("nitrogen_content").alias("nitrogen_content"),
    avg("phosphorus_content").alias("phosphorus_content"),
    avg("potassium_content").alias("potassium_content"),
    avg("ph_level").alias("ph_level")
)

In [0]:
# 4. Preview
display(df_soil_silver)

state_name,nitrogen_content,phosphorus_content,potassium_content,ph_level
Andhra Pradesh,78.0,45.0,22.0,6.8
Arunachal Pradesh,55.0,15.0,35.0,5.5
Assam,60.0,18.0,38.0,5.8
Bihar,85.0,30.0,25.0,7.2
Chhattisgarh,70.0,35.0,20.0,6.5
Delhi,90.0,40.0,30.0,7.5
Goa,65.0,25.0,45.0,6.2
Gujarat,75.0,38.0,28.0,7.8
Haryana,130.0,48.0,35.0,7.9
Himachal Pradesh,60.0,20.0,40.0,6.0


In [0]:
# 5. Write to Silver Table
df_soil_silver.write \
    .format("delta") \
    .mode("overwrite") \
    .option("overwriteSchema", "true") \
    .saveAsTable("agriculture.silver.state_soil_profiles")

print("Silver State Soil Profiles Table Created Successfully")

Silver State Soil Profiles Table Created Successfully
