## DATA COLLECTION

### Openweathermap

* Using [Openweathermap](https://openweathermap.org/) for data collection <br>
* For their API list visit - [apis](https://openweathermap.org/api) <br>
* [API Documentation](https://openweathermap.org/api/air-pollution)
* API used - http://api.openweathermap.org/data/2.5/air_pollution/history?lat={lat}&lon={lon}&start={start}&end={end}&appid={API key}

In [None]:
import requests
import pandas as pd
import json
from datetime import datetime
import time
import os.path

In [None]:
#check your in colab
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False
IN_COLAB

In [None]:
cred_json = 'credentials.json'

In [None]:
if IN_COLAB == True:
   from google.colab import files
   import io
   uploaded = files.upload()
   file = io.BytesIO(uploaded[cred_json])
   credentials = json.load(file)
   
else:
   # Opening JSON file
   file = open(cred_json)
   credentials = json.load(file)
   file.close()

In [None]:
# owm_aq =>  Open Weather Map Air Quality
if IN_COLAB == True:
   file_path = "owm_aq.csv"
else:
   file_path = "..\..\data\owm_aq.csv"

#Enter your APP ID here or fetch it from file
APP_ID = credentials['OPENWEATHERMAP_API']
# APP_ID

In [None]:
LAT = 19.07
LON = 72.88

#Date format (YYYY, M,D,H,m)
#Extended the end date to 4/3/23

# START_DATE = datetime(2020, 11, 25, 1, 0)
START_DATE = datetime(2015, 1, 1, 1, 0)
# END_DATE = datetime(2023, 3, 4, 23, 0)
END_DATE = datetime.now()

START_UNIX = int(time.mktime(START_DATE.timetuple()))
END_UNIX = int(time.mktime(END_DATE.timetuple()))

# print('Start unix: {}'.format(START_UNIX))
# print('End unix: {}'.format(END_UNIX))

# check file is already exists
if os.path.isfile(file_path):
    # print("File exists")
    df = pd.read_csv(file_path)
    # create a Pandas Timestamp object
    ts = pd.Timestamp(df.date.max())
    # convert to integer
    # ts_int = int(ts.timestamp() )
    ts_int2 = int(ts.timestamp() + 1)
    # print(ts_int, ' ', ts_int2)
    # set start date from where last updated
    START_UNIX = ts_int2

In [None]:
print('Start unix: {}'.format(START_UNIX))
print('End unix: {}'.format(END_UNIX))

In [None]:
url = 'http://api.openweathermap.org/data/2.5/air_pollution/history?lat={}&lon={}&start={}&end={}&appid={}'.format(LAT,LON,START_UNIX,END_UNIX,APP_ID)

In [None]:
#Getting the response from the api
response = requests.get(url)

In [None]:
#response text
# response.text

In [None]:
r = json.loads(response.text)

In [None]:
len(r['list'])

In [None]:
#Parsing the variables and extracting the time,co,no2,o3,so2,pm2.5,pm10,nh3. Each row of data is stored as a list which is then converted to a dataframe
#Some dates have multiple observations
#Added the AQI column --
data = []
from datetime import datetime
for c in r['list']:
    # old code
    # date = datetime.utcfromtimestamp(int(c['dt'])).strftime('%Y-%m-%d')
    # new code - keeping time information
    timestamp = datetime.fromtimestamp(int(c['dt']))
    timestamp.strftime('%Y-%m-%d %H:%M:%S')
    date = timestamp
    data.append([date,c['main']['aqi'],c['components']['co'],c['components']['no'],c['components']['no2'],c['components']['o3'],c['components']['so2'],c['components']['pm2_5'],c['components']['pm10'],c['components']['nh3']])
    

In [None]:
#exporting dataset
frames = []
if os.path.isfile(file_path):
  df_existing = pd.read_csv(file_path)
  frames.append(df_existing)
if(len(data) > 0):
  #Converting the list to a dataframe
  df_new =pd.DataFrame(data,columns=['date','AQI','co','no','no2','o3','so2','pm2_5','pm10','nh3'])
  frames.append(df_new)

df = pd.concat(frames)

In [None]:
#exporting dataset
# owm_aq =>  Open Weather Map Air Quality
df.to_csv(file_path, index=False)

In [None]:
df.head()

In [None]:
df = pd.read_csv(file_path)

In [None]:
df.head()

In [None]:
df.isnull().sum()

In [None]:
df.date.min(), df.date.max(), df.shape

In [None]:
df.date.min(), df.date.max(), df.shape

In [None]:
# create a Pandas Timestamp object
ts = pd.Timestamp(df.date.max())
# convert to integer
ts_int = int(ts.timestamp())
ts_int2 = int(ts.timestamp() + 1)
print(ts_int, ' ', ts_int2)