# 🌍 AQI Forecasting & Early Warning Dashboard

**Real-time Air Quality Intelligence for Indian Cities**

---

In [0]:
-- KPI 1: Overall Average AQI
SELECT 
  ROUND(AVG(aqi), 1) as overall_avg_aqi,
  'Overall Average AQI' as metric
FROM aqi_india.silver.aqi_cleaned;

overall_avg_aqi,metric
124.8,Overall Average AQI


Databricks visualization. Run in Databricks to view.

In [0]:
-- KPI 2: Total Cities Monitored
SELECT 
  COUNT(DISTINCT city) as total_cities,
  'Cities Monitored' as metric
FROM aqi_india.silver.aqi_cleaned;

total_cities,metric
275,Cities Monitored


Databricks visualization. Run in Databricks to view.

In [0]:
-- KPI 3: High Pollution Days
SELECT 
  SUM(CASE WHEN aqi > 200 THEN 1 ELSE 0 END) as high_pollution_days,
  'High Pollution Days' as metric
FROM aqi_india.silver.aqi_cleaned;

high_pollution_days,metric
50854,High Pollution Days


Databricks visualization. Run in Databricks to view.

In [0]:
-- KPI 4: Data Quality Score
SELECT 
  ROUND(COUNT(CASE WHEN aqi IS NOT NULL THEN 1 END) * 100.0 / COUNT(*), 1) as data_quality,
  'Data Quality %' as metric
FROM aqi_india.silver.aqi_cleaned;

data_quality,metric
100.0,Data Quality %


Databricks visualization. Run in Databricks to view.

In [0]:
-- ============================================================================
-- VISUALIZATION 2: AQI Distribution by Category
-- Insight: Shows distribution of air quality levels
-- ============================================================================
SELECT 
  aqi_risk_level as category,
  COUNT(*) as days,
  ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER(), 1) as percentage
FROM aqi_india.silver.aqi_cleaned
GROUP BY aqi_risk_level
ORDER BY 
  CASE aqi_risk_level
    WHEN 'Low' THEN 1
    WHEN 'Medium' THEN 2
    WHEN 'High' THEN 3
    WHEN 'Very High' THEN 4
    WHEN 'Severe' THEN 5
  END;

category,days,percentage
Satisfactory,104064,35.0
Poor,34378,11.6
Very Poor,13962,4.7
Moderate,95004,32.0
Good,47287,15.9
Severe,2514,0.8


Databricks visualization. Run in Databricks to view.

In [0]:
-- ============================================================================
-- VISUALIZATION 3: Seasonal Pollution Patterns
-- Insight: Winter shows highest pollution
-- ============================================================================

SELECT 
  season,
  ROUND(AVG(aqi), 1) as avg_aqi,
  ROUND(MIN(aqi), 1) as min_aqi,
  ROUND(MAX(aqi), 1) as max_aqi,
  COUNT(*) as total_days
FROM aqi_india.silver.aqi_cleaned
GROUP BY season
ORDER BY avg_aqi DESC;

season,avg_aqi,min_aqi,max_aqi,total_days
Winter,169.3,8.0,500.0,72657
Autumn,154.4,9.0,500.0,54412
Spring,123.2,9.0,500.0,71137
Monsoon,77.1,3.0,500.0,99003


Databricks visualization. Run in Databricks to view.

In [0]:
-- ============================================================================
-- VISUALIZATION 4: Monthly Trend Analysis
-- Insight: Clear monthly patterns for forecasting
-- ============================================================================

SELECT 
  month,
  CASE month
    WHEN 1 THEN 'Jan'
    WHEN 2 THEN 'Feb'
    WHEN 3 THEN 'Mar'
    WHEN 4 THEN 'Apr'
    WHEN 5 THEN 'May'
    WHEN 6 THEN 'Jun'
    WHEN 7 THEN 'Jul'
    WHEN 8 THEN 'Aug'
    WHEN 9 THEN 'Sep'
    WHEN 10 THEN 'Oct'
    WHEN 11 THEN 'Nov'
    WHEN 12 THEN 'Dec'
  END as month_name,
  ROUND(AVG(aqi), 1) as avg_aqi,
  ROUND(STDDEV(aqi), 1) as std_aqi
FROM aqi_india.silver.aqi_cleaned
GROUP BY month
ORDER BY month;

month,month_name,avg_aqi,std_aqi
1,Jan,175.3,100.5
2,Feb,151.2,78.2
3,Mar,129.9,66.8
4,Apr,124.8,70.2
5,May,114.9,66.7
6,Jun,98.4,62.2
7,Jul,69.1,42.6
8,Aug,69.5,37.5
9,Sep,72.7,41.2
10,Oct,127.5,79.5


Databricks visualization. Run in Databricks to view.

In [0]:
-- ============================================================================
-- VISUALIZATION 5: Top 15 Most Polluted Cities
-- Insight: Identifies cities needing urgent intervention

SELECT 
  city,
  ROUND(avg_aqi, 1) as avg_aqi,
  high_pollution_days,
  total_records
FROM aqi_india.gold.city_summary
ORDER BY avg_aqi DESC
LIMIT 15;

city,avg_aqi,high_pollution_days,total_records
Jharsuguda,282.0,1,1
Byrnihat,248.7,191,281
Begusarai,248.1,214,357
Angul,238.1,43,62
Ghaziabad,221.6,1273,2418
Delhi,217.1,1648,3143
Siwan,216.2,283,587
Bhiwadi,215.9,1122,2063
Barrackpore,215.2,18,27
Chhapra,215.0,354,674


Databricks visualization. Run in Databricks to view.

In [0]:
-- ============================================================================
-- VISUALIZATION 6: Year-over-Year Trend
-- Insight: Is pollution improving or worsening?

SELECT 
  year,
  ROUND(AVG(aqi), 1) as avg_aqi,
  ROUND(MIN(aqi), 1) as min_aqi,
  ROUND(MAX(aqi), 1) as max_aqi,
  COUNT(*) as measurements,
  SUM(CASE WHEN is_high_pollution THEN 1 ELSE 0 END) as high_pollution_days
FROM aqi_india.silver.aqi_cleaned
GROUP BY year
ORDER BY year;

year,avg_aqi,min_aqi,max_aqi,measurements,high_pollution_days
2015,158.6,15.0,500.0,2795,799
2016,159.7,14.0,500.0,7221,2040
2017,153.6,18.0,500.0,11855,3093
2018,147.3,17.0,500.0,21237,4881
2019,136.6,10.0,500.0,33322,6727
2020,113.8,6.0,500.0,39837,5569
2021,120.8,9.0,500.0,47285,8145
2022,121.8,8.0,500.0,56766,9513
2023,114.9,3.0,500.0,76891,10087


In [0]:
-- ============================================================================
-- VISUALIZATION 7: Prominent Pollutants Distribution
-- Insight: Which pollutants are most common?

SELECT 
  prominent_pollutant,
  COUNT(*) as occurrences,
  ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER(), 1) as percentage
FROM aqi_india.silver.aqi_cleaned
WHERE prominent_pollutant IS NOT NULL
GROUP BY prominent_pollutant
ORDER BY occurrences DESC;

prominent_pollutant,occurrences,percentage
PM10,125707,42.3
PM2.5,90602,30.5
CO,18239,6.1
O3,18234,6.1
"PM10, PM2.5",13021,4.4
NO2,5134,1.7
OZONE,4032,1.4
SO2,3666,1.2
"O3, PM10",3086,1.0
"CO, PM10",2465,0.8


Databricks visualization. Run in Databricks to view.

In [0]:
-- ============================================================================
-- VISUALIZATION 8: Delhi Time Series (Last 2 Years)
-- Insight: Actual AQI vs 7-day moving average

SELECT 
  date,
  aqi as actual_aqi,
  aqi_rolling_avg_7 as rolling_avg_7d,
  200 as threshold_high
FROM aqi_india.gold.aqi_ml_features
WHERE city = 'Delhi'
  AND date >= DATE_SUB((SELECT MAX(date) FROM aqi_india.gold.aqi_ml_features), 730)
ORDER BY date;

date,actual_aqi,rolling_avg_7d,threshold_high
2021-12-31,321.0,336.0,200
2022-01-01,362.0,326.1428571428572,200
2022-01-02,404.0,318.2857142857143,200
2022-01-03,387.0,333.1428571428572,200
2022-01-04,378.0,343.57142857142856,200
2022-01-05,397.0,362.1428571428572,200
2022-01-06,258.0,358.1428571428572,200
2022-01-07,182.0,338.2857142857143,200
2022-01-08,91.0,299.57142857142856,200
2022-01-09,69.0,251.7142857142857,200


Databricks visualization. Run in Databricks to view.

In [0]:
-- ============================================================================
-- VISUALIZATION 9: Weekend vs Weekday Pollution
-- Insight: Does pollution differ on weekends?

SELECT 
  CASE WHEN is_weekend THEN 'Weekend' ELSE 'Weekday' END as day_type,
  ROUND(AVG(aqi), 1) as avg_aqi,
  COUNT(*) as days
FROM aqi_india.silver.aqi_cleaned
GROUP BY is_weekend
ORDER BY avg_aqi DESC;

day_type,avg_aqi,days
Weekday,125.2,212454
Weekend,123.9,84755


Databricks visualization. Run in Databricks to view.

In [0]:
-- ============================================================================
-- VISUALIZATION 10: City Comparison (Top 10)
-- Insight: Compare trends across major cities

WITH top_cities AS (
  SELECT city
  FROM aqi_india.gold.city_summary
  ORDER BY avg_aqi DESC
  LIMIT 10
)
SELECT 
  c.year,
  c.month,
  c.city,
  ROUND(AVG(c.aqi), 1) as avg_aqi
FROM aqi_india.silver.aqi_cleaned c
INNER JOIN top_cities t ON c.city = t.city
WHERE c.year >= 2020
GROUP BY c.year, c.month, c.city
ORDER BY c.year, c.month, c.city;

year,month,city,avg_aqi
2020,1,Bhiwadi,248.8
2020,1,Delhi,285.7
2020,1,Ghaziabad,313.5
2020,2,Bhiwadi,250.0
2020,2,Delhi,241.2
2020,2,Ghaziabad,264.3
2020,3,Bhiwadi,151.3
2020,3,Delhi,128.2
2020,3,Ghaziabad,141.5
2020,4,Bhiwadi,94.4


Databricks visualization. Run in Databricks to view.

In [0]:
-- ============================================================================
-- VISUALIZATION 12: Monthly Data
-- Insight: Identify pollution patterns by month and year

SELECT 
  year,
  month,
  ROUND(AVG(aqi), 1) as avg_aqi
FROM aqi_india.silver.aqi_cleaned
GROUP BY year, month
ORDER BY year, month;

year,month,avg_aqi
2015,5,146.8
2015,6,113.7
2015,7,87.0
2015,8,86.1
2015,9,112.6
2015,10,181.8
2015,11,246.8
2015,12,249.4
2016,1,257.6
2016,2,193.9
