## DATA COLLECTION

* Using [Openweathermap](https://openweathermap.org/) for data collection <br>
* For their API list visit - [apis](https://openweathermap.org/api) <br>
* [API Documentation](https://openweathermap.org/api/air-pollution)
* API used - http://api.openweathermap.org/data/2.5/air_pollution/history?lat={lat}&lon={lon}&start={start}&end={end}&appid={API key}

In [None]:
import requests
import pandas as pd
import json
from datetime import datetime
import time

In [None]:
#Date format (YYYY, M,D,H,m)
#Extended the end date to 4/3/23

START_DATE = datetime(2020, 11, 25, 1, 0)
END_DATE = datetime(2023, 3, 4, 23, 0)

LAT = 19.07
LON = 72.88

#Enter your APP ID here
APP_ID = 'a82bcbb38561edff907416a47e4c15f4'

START_UNIX = int(time.mktime(START_DATE.timetuple()))
END_UNIX = int(time.mktime(END_DATE.timetuple()))

print('Start unix: {}'.format(START_UNIX))
print('End unix: {}'.format(END_UNIX))

In [None]:
url = 'http://api.openweathermap.org/data/2.5/air_pollution/history?lat={}&lon={}&start={}&end={}&appid={}'.format(LAT,LON,START_UNIX,END_UNIX,APP_ID)

In [None]:
#Getting the response from the api
response = requests.get(url)

In [None]:
#response text
# response.text

In [None]:
r = json.loads(response.text)

In [None]:
#Parsing the variables and extracting the time,co,no2,o3,so2,pm2.5,pm10,nh3. Each row of data is stored as a list which is then converted to a dataframe
#Some dates have multiple observations
#Added the AQI column --
data = []

for c in r['list']:
    date = datetime.utcfromtimestamp(int(c['dt'])).strftime('%Y-%m-%d')
    data.append([date,c['main']['aqi'],c['components']['co'],c['components']['no'],c['components']['no2'],c['components']['o3'],c['components']['so2'],c['components']['pm2_5'],c['components']['pm10'],c['components']['nh3']])

In [None]:
#Converting the list to a dataframe
df =pd.DataFrame(data,columns=['date','AQI','co','no','no2','o3','so2','pm2_5','pm10','nh3'])

In [None]:
df.head()

In [None]:
#exporting dataset
df.to_csv('../../data/airQuality_1.csv', index=False)

In [None]:
df_read = pd.read_csv('../../data/airQuality_1.csv')

In [None]:
df_read.head()

In [None]:
df_read.isnull().sum()