# PROJECT : REAL-TIME AQI ANALYSIS AND VISUALIZATION OF INDIA

# - IMPORTING NECESSARY LIBRARIES :

In [1]:
import pandas as pd
import folium
from folium.plugins import HeatMap

# ###-STEP 1 DOWNLOAD DATA

In [2]:
# Details of API at:- https://aqicn.org/api/
base_url = "https://api.waqi.info"

# Got a special User token from:- https://aqicn.org/data-platform/token/#/ by registering myself on this site.

In [3]:
tok = "0976e0617860ee99e9fbbfb1e3f84c9e44fd4fa2" 

# (lat, long)-> bottom left, (lat, lon)-> top right

# Location of India is 8N 61E to 37N, 97E approx

In [4]:
latlngbox = "8.0000,61.0000,37.0000,97.0000" # For India

In [5]:
trail_url=f"/map/bounds/?latlng={latlngbox}&token={tok}" 

# Joining the parts of URL: 

In [6]:
my_data = pd.read_json(base_url + trail_url)

# Printing 2 cols ‘status’ and ‘data’

In [7]:
print('columns->', my_data.columns)

columns-> Index(['status', 'data'], dtype='object')


# ###-STEP 2:- Create table like DataFrame

In [8]:
all_rows = []
for each_row in my_data['data']:
    all_rows.append([each_row['station']['name'],each_row['lat'],each_row['lon'],each_row['aqi']])
    df = pd.DataFrame(all_rows,
    columns=['station_name', 'lat', 'lon', 'aqi'])

# ### -STEP 3:- Cleaning the DataFrame# 

# Converting Invalid parse to NaN

In [9]:
df['aqi'] = pd.to_numeric(df.aqi,
errors='coerce')

# Printing Values with NaN : 

In [10]:
print('with NaN->', df.shape) 

with NaN-> (207, 4)


#  Remove NaN (Not a Number) entries in column:

In [11]:
df1 = df.dropna(subset = ['aqi'])

# Printing Values Without NaN:

In [12]:
print('without NaN->', df1.shape)

without NaN-> (199, 4)


# ###-STEP 4:- Making folium heat map

In [13]:
df2 = df1[['lat', 'lon', 'aqi']]

# To Print Our DataFrame:

In [14]:
print(df2.head)

<bound method NDFrame.head of            lat        lon    aqi
0    29.800600  76.415500  357.0
1    19.110740  72.860840  179.0
2    22.969611  72.643500  160.0
3    28.376058  77.315741  289.0
4    28.422700  77.066700  211.0
..         ...        ...    ...
202  30.719859  76.738637  201.0
203  27.573483  84.498578  148.0
204  27.422675  85.034416   55.0
205  24.261301  80.723178   98.0
206  23.233584  77.400574  155.0

[199 rows x 3 columns]>


# Giving Central Location:

In [15]:
init_loc = [23, 77] # Approx over Bhopal

# Getting and Printing Max_Aqi of Locations:

In [16]:
max_aqi = int(df1['aqi'].max())
print('max_aqi->', max_aqi)

max_aqi-> 562


# Visualization Of Live_HeatMap of India:

In [17]:
m = folium.Map(location = init_loc, zoom_start = 5)
 
heat_aqi = HeatMap(df2, min_opacity = 0.1, max_val = max_aqi,
radius = 20, blur = 20, max_zoom = 2)
m.add_child(heat_aqi)
m # Show the map

  heat_aqi = HeatMap(df2, min_opacity = 0.1, max_val = max_aqi,


# ###-STEP 5 : Ploting stations on map

In [18]:
centre_point = [23.25, 77.41] # Approx over Bhopal
m2 = folium.Map(location = centre_point,
tiles = 'Stamen Terrain',
zoom_start= 6)
for idx, row in df1.iterrows():
    lat = row['lat']
    lon = row['lon']
    station = row['station_name'] + ' AQI=' + str(row['aqi'])
    station_aqi = row['aqi']
    if station_aqi > 300: ## Red for very bad AQI
        pop_color = 'red'
    elif station_aqi > 200:
        pop_color = 'orange' ## Orange for moderate AQI
    else:
        pop_color = 'green' ## Green for good AQI
    folium.Marker(location= [lat, lon],
    popup = station,
    icon = folium.Icon(color = pop_color)).add_to(m2)
m2 # Display map

In [19]:
import requests
import json
import pandas as pd
import re
import datetime
import time
import base64
from itertools import product

stationsData = pd.read_csv("C:/Users/hp/OneDrive/Desktop/PROJECT 3rd Sem/station.csv")

def getData(api, filters):
    url1 = "https://api.data.gov.in/resource/3b01bcb8-0b14-4abf-b6f2-c1bfd384ba69?api-key=" + api + "&format=json&limit=500"
    criteriaAll = [[(k, re.sub(r'\s+', '%20', v)) for v in criteria[k]] for k in criteria]
    url2 = [url1 + ''.join(f'&filters[{ls}]={value}' for ls, value in p) for p in product(*criteriaAll)]
    
    pollutionDfAll = pd.DataFrame()
    for i in url2:
        response = requests.get(i, verify=True)
        response_dict = json.loads(response.text)
        pollutionDf = pd.DataFrame(response_dict['records'])
        pollutionDfAll = pd.concat([pollutionDfAll, pollutionDf])
    
    return pollutionDfAll



api ="579b464db66ec23bdd000001fcbeb272b328454e41d1cd46d77298ba"


# In the code below, there are two arguments that we needs to input - API Key Filter criteria. Filter criteria can have "state", "city", "station", "pollutant_id". To see the unique values of state, city and station, you can download and refer the dataset shown above. Distinct values of pollutant_id are as follows -"PM2.5" "PM10"  "NO2"   "NH3"   "SO2"   "CO"    "OZONE"

In [20]:
criteria = {'city':["Rupnagar","Punjab"], 'pollutant_id': ["PM10", "PM2.5","NO2","NH3","SO2","CO","OZONE"]}
mydata = getData(api, criteria)
mydata

Unnamed: 0,id,country,state,city,station,last_update,pollutant_id,pollutant_min,pollutant_max,pollutant_avg,pollutant_unit
0,1406,India,Punjab,Rupnagar,"Ratanpura, Rupnagar - Ambuja Cements",10-11-2022 09:00:00,PM10,99,162,131,
0,1405,India,Punjab,Rupnagar,"Ratanpura, Rupnagar - Ambuja Cements",10-11-2022 09:00:00,PM2.5,58,308,156,
0,1407,India,Punjab,Rupnagar,"Ratanpura, Rupnagar - Ambuja Cements",10-11-2022 09:00:00,NO2,11,29,18,
0,1408,India,Punjab,Rupnagar,"Ratanpura, Rupnagar - Ambuja Cements",10-11-2022 09:00:00,SO2,5,9,7,
0,1409,India,Punjab,Rupnagar,"Ratanpura, Rupnagar - Ambuja Cements",10-11-2022 09:00:00,CO,26,84,44,


# To find AQI score of station(s) which is the most granular level of information. We can club it with the pollutant ID to narrow down Our search result.

In [21]:
criteria = {"station":["Anand Vihar, Delhi - DPCC", "Okhla Phase-2, Delhi - DPCC"], "pollutant_id":["PM10"]}
mydata = getData(api, criteria)
mydata

Unnamed: 0,id,country,state,city,station,last_update,pollutant_id,pollutant_min,pollutant_max,pollutant_avg,pollutant_unit
0,303,India,Delhi,Delhi,"Anand Vihar, Delhi - DPCC",10-11-2022 09:00:00,PM10,131,500,329,
0,449,India,Delhi,Delhi,"Okhla Phase-2, Delhi - DPCC",10-11-2022 09:00:00,PM10,133,441,274,


In [22]:
criteria={"station": ["Sector 22, Chandigarh - CPCC"],'pollutant_id': ["PM10", "PM2.5","NO2","NH3","SO2","CO","OZONE"]}
mydata = getData(api, criteria)
mydata

Unnamed: 0,id,country,state,city,station,last_update,pollutant_id,pollutant_min,pollutant_max,pollutant_avg,pollutant_unit
0,270,India,Chandigarh,Chandigarh,"Sector 22, Chandigarh - CPCC",10-11-2022 09:00:00,PM10,121,392,217,
0,269,India,Chandigarh,Chandigarh,"Sector 22, Chandigarh - CPCC",10-11-2022 09:00:00,PM2.5,187,401,323,
0,271,India,Chandigarh,Chandigarh,"Sector 22, Chandigarh - CPCC",10-11-2022 09:00:00,NO2,33,190,79,
0,272,India,Chandigarh,Chandigarh,"Sector 22, Chandigarh - CPCC",10-11-2022 09:00:00,NH3,1,11,7,
0,273,India,Chandigarh,Chandigarh,"Sector 22, Chandigarh - CPCC",10-11-2022 09:00:00,SO2,9,70,22,
0,274,India,Chandigarh,Chandigarh,"Sector 22, Chandigarh - CPCC",10-11-2022 09:00:00,CO,20,129,31,
0,275,India,Chandigarh,Chandigarh,"Sector 22, Chandigarh - CPCC",10-11-2022 09:00:00,OZONE,11,268,39,
