# Basic Setup

In [None]:
# import required libraries
import pandas as pd
import sqlite3

In [None]:
%pip install ipython-sql
# no need to install again once previously installed

In [None]:
# load the sql module to iPython
%load_ext sql

In [None]:
# 'Cell Magic' concept
%sql sqlite:///aqiDB.db

In [None]:
%%sql
select * from air_quality_index
limit 5

# SQL Data Analysis for Air Quality Index

### Q1. What is the average pollutant level (pollutant_avg) for each pollutant type across all cities?

In [None]:
%%sql
SELECT pollutant_id, AVG(pollutant_avg) as average_pollutant_level
FROM air_quality_index
GROUP BY pollutant_id;

### Q2. Which city has the highest average pollutant level for PM10?

In [None]:
%%sql
SELECT city, MAX(pollutant_avg) as max_pm10_level
FROM air_quality_index
WHERE pollutant_id = 'PM10' AND pollutant_avg <> 'NA'
GROUP BY city
ORDER BY max_pm10_level DESC
LIMIT 1;

### Q3. How does the average pollutant level of PM2.5 compare between different states?

In [None]:
%%sql
SELECT state, round(AVG(pollutant_avg), 2) as average_pm2_5_level
FROM air_quality_index
WHERE pollutant_id = 'PM2.5'
GROUP BY state;

### Q4. What is the trend of pollutant levels over time for a specific city?

In [None]:
%%sql
SELECT city, station, last_update, pollutant_avg
FROM air_quality_index
WHERE city = 'Amaravati'
ORDER BY last_update;

Considering city as Amravati. The results obtained are not useful as update time is same for all the data.

### Q5. Which stations have recorded the highest maximum pollutant levels (pollutant_max) for each pollutant?

In [None]:
%%sql
SELECT station, pollutant_id, MAX(pollutant_max) as max_pollutant_level
FROM air_quality_index
WHERE pollutant_max <> 'NA'
GROUP BY station, pollutant_id
ORDER BY max_pollutant_level DESC;

### Q6. Which city has the highest average pollutant level for PM10 and PM2.5 combined?

In [None]:
%%sql
SELECT city, AVG(pollutant_avg) as average_pollutant_level
FROM air_quality_index
WHERE pollutant_id IN ('PM10', 'PM2.5')
GROUP BY city
ORDER BY average_pollutant_level DESC
LIMIT 1;

### Q7. Are there any significant differences in pollutant levels between urban and rural stations?

In [None]:
%%sql
SELECT station, AVG(pollutant_avg) as average_pollutant_level
FROM air_quality_index
GROUP BY station
ORDER BY average_pollutant_level DESC;

### Q8. Which pollutants have shown the most variation in their levels (difference between pollutant_max and pollutant_min) across different stations?

In [None]:
%%sql
SELECT pollutant_id, AVG(pollutant_max - pollutant_min) as average_variation
FROM air_quality_index
GROUP BY pollutant_id
ORDER BY average_variation DESC;

### Q9. How do pollutant levels vary by geographical coordinates (latitude and longitude)?

In [None]:
%%sql
SELECT latitude, longitude, AVG(pollutant_avg) as average_pollutant_level
FROM air_quality_index
GROUP BY latitude, longitude;

### Q.10 What are the top five stations with the highest average pollutant levels for PM10 and PM2.5?

In [None]:
%%sql
SELECT station, pollutant_id, AVG(pollutant_avg) as average_pollutant_level
FROM air_quality_index
WHERE pollutant_id IN ('PM10', 'PM2.5')
GROUP BY station, pollutant_id
ORDER BY average_pollutant_level DESC
LIMIT 5;