In [0]:
%sql
USE CATALOG agriculture

In [0]:
from pyspark.sql.functions import avg, min, max, count, stddev, col, round, expr

# 1. Read the Silver Table
# Using the exact name from your screenshot
df_silver = spark.read.table("agriculture.silver.crop_production")

# 2. Aggregate to get State-Level Stats
# We group by State and Crop to create a unique profile for each pair
df_gold = df_silver.groupBy("state", "crop").agg(
    # How much do they usually get?
    round(avg("calculated_yield"), 2).alias("avg_yield"),
    
    # Risk factor: How much does the yield jump around? (Standard Deviation)
    # COALESCE(stddev, 0) handles cases where there's only 1 year of data (returns null otherwise)
    round(expr("coalesce(stddev(calculated_yield), 0)"), 2).alias("yield_volatility"),
    
    # Validation: When was it last seen? 
    # If the last record is 1998, maybe we shouldn't recommend it in 2025.
    max("year").alias("last_cultivated_year"),
    
    # Reliability: How many years of data do we have?
    count("year").alias("years_of_history")
)

# 3. Filter for "Active" Crops (Optional but Recommended)
# Let's keep only crops that have been grown reasonably recently (e.g., after 2010)
# and have at least 3 years of history to be statistically significant.
df_gold_filtered = df_gold.filter(
    (col("last_cultivated_year") >= 2010) & 
    (col("years_of_history") >= 3)
)

# 4. Save to Gold
df_gold_filtered.write.format("delta").mode("overwrite").saveAsTable("agriculture.gold.regional_yield_stats")

print(f"Gold Table Created. Row Count: {df_gold_filtered.count()}")
display(df_gold_filtered.orderBy("state", "crop"))

Gold Table Created. Row Count: 161


state,crop,avg_yield,yield_volatility,last_cultivated_year,years_of_history
Andaman and Nicobar Islands,banana,8.9,5.46,2010,16
Andaman and Nicobar Islands,coconut,3796.02,519.22,2010,18
Andaman and Nicobar Islands,maize,3.11,1.84,2010,3
Andaman and Nicobar Islands,rice,2.9,0.9,2010,17
Andhra Pradesh,banana,26.14,15.55,2013,156
Andhra Pradesh,chickpea,1.44,0.57,2014,207
Andhra Pradesh,coconut,10689.47,4729.02,2014,169
Andhra Pradesh,cotton,2.1,0.98,2014,245
Andhra Pradesh,grapes,20.59,4.32,2014,17
Andhra Pradesh,maize,4.83,2.35,2014,456


In [0]:
%sql
-- Find the most established crops in Punjab
SELECT 
    crop, 
    avg_yield, 
    years_of_history, 
    last_cultivated_year
FROM agriculture.gold.regional_yield_stats
WHERE state LIKE '%Punjab%'
ORDER BY years_of_history DESC
LIMIT 5

crop,avg_yield,years_of_history,last_cultivated_year
rice,3.71,338,2014
maize,3.19,219,2014
chickpea,0.98,211,2014
cotton,3.0,186,2014
lentil,0.7,119,2014
