In [0]:
from pyspark.sql import functions as F

silver_table = "workspace.air_quality.silver_air_quality"
gold_table = "workspace.air_quality.gold_daily_aqi"

gold_daily = (spark.table(silver_table)
    .groupBy("city_code", "state_code", "date")
    .agg(
        F.avg("pm25_ugm3").alias("avg_pm25"),
        F.max("pm25_ugm3").alias("max_pm25"),
        F.avg("pm10_ugm3").alias("avg_pm10"),
        F.max("pm10_ugm3").alias("max_pm10"),
        F.avg("no2_ugm3").alias("avg_no2"),
        F.avg("ozone_ugm3").alias("avg_ozone"),
        F.avg("temp_c").alias("avg_temp"),
        F.avg("relative_humidity_pct").alias("avg_rh"),
        F.avg("wind_speed_ms").alias("avg_wind_speed"),
        F.count("*").alias("measurements")
    )
)

display(gold_daily.limit(10))

city_code,state_code,date,avg_pm25,max_pm25,avg_pm10,max_pm10,avg_no2,avg_ozone,avg_temp,avg_rh,avg_wind_speed,measurements
HR001,HR,2013-03-12,,,23.2,23.2,70.82,0.0,745.65,0.14,0.0,1
HR001,HR,2013-09-25,,,10.21,10.21,12.53,0.0,756.1,0.1,0.0,1
HR001,HR,2014-07-12,,,6.49,6.49,13.78,0.0,724.52,-0.13,0.0,1
HR001,HR,2016-02-19,213.0,213.0,26.51,26.51,55.36,3.0,737.6,-0.14,0.0,1
HR001,HR,2016-04-08,75.0,75.0,13.3,13.3,15.83,1.02,732.0,-0.08,0.0,1
HR002,HR,2022-09-15,,,,,,,,,,1
HR003,HR,2017-05-29,,,,,,,,,,1
HR003,HR,2019-06-25,20.84,20.84,,,10.93,0.69,63.75,1.46,188.94,1
HR004,HR,2016-04-10,,,,,,,,,,1
HR004,HR,2021-04-04,45.04,45.04,,,76.25,4.12,29.54,1.31,161.15,1


In [0]:
def calculate_aqi(pm25):
    return F.when(pm25 <= 30, pm25 * 50 / 30) \
           .when(pm25 <= 60, 50 + (pm25 - 30) * 50 / 30) \
           .when(pm25 <= 90, 100 + (pm25 - 60) * 50 / 30) \
           .when(pm25 <= 120, 150 + (pm25 - 90) * 50 / 30) \
           .when(pm25 <= 250, 200 + (pm25 - 120) * 100 / 130) \
           .otherwise(500).alias("aqi")

gold_final = gold_daily.withColumn("aqi", calculate_aqi(F.col("avg_pm25")))

(gold_final.write
    .format("delta")
    .mode("overwrite")
    .partitionBy("state_code")
    .saveAsTable(gold_table)
)

print("Gold table ready!")

Gold table ready!
