In [None]:
# create_file to update_file_content

In [1]:
# station list version - updated stationIDs

import pandas as pd
import requests
import json
from geojson import Point, Feature, FeatureCollection, dump
from apscheduler.schedulers.background import BackgroundScheduler
from datetime import datetime
from github import Github

# function to request through API and get response as a JSON file
# require requests & json package
def getJSONR(url): 
    try:
        ua = {'user-agent':'Chrome/71.0.3578.98'}
        r = requests.get(url, headers = ua, timeout=30)
        r.raise_for_status() # raise error if r.status_code != 200
        r.encoding = r.apparent_encoding
        js = json.loads(r.text)
        return js
    except Exception as e:
        return e

# functions to get air quality data from the JSON response of World's Air Pollution API
def getID(js):
    try:
        id = js['data']['idx']
        return id
    except Exception as e:
        return -1
    
def getSTATUS(js):
    try:
        status = js['status']
        return status
    except Exception as e:
        return -1

def getNAME(js):
    try:
        name = js['data']['city']['name']
        return name
    except Exception as e:
        return -1
    
def getGEO(js):
    try:
        geo = []
        lat = js['data']['city']['geo'][0]
        lng = js['data']['city']['geo'][1]
        geo.append(lat)
        geo.append(lng)
        return geo
    except Exception as e:
        geo = [-1, -1]
        return geo
    
def getTIME(js):
    try:
        time = js['data']['time']['s']
        return time
    except Exception as e:
        return -1
    
def getAQI(js):
    try:
        aqi = js['data']['aqi']
        return aqi
    except Exception as e:
        return -1

def getPM10(js):
    try:
        pm10 = js['data']['iaqi']['pm10']['v']
        return pm10
    except Exception as e:
        return -1

def getPM25(js):
    try:
        pm25 = js['data']['iaqi']['pm25']['v']
        return pm25
    except Exception as e:
        return -1

def getNO2(js):
    try:
        no2 = js['data']['iaqi']['no2']['v']
        return no2
    except Exception as e:
        return -1

def getO3(js):
    try:
        o3 = js['data']['iaqi']['o3']['v']
        return o3
    except Exception as e:
        return -1
    
def getSO2(js):
    try:
        so2 = js['data']['iaqi']['so2']['v']
        return so2
    except Exception as e:
        return -1

def getCO(js):
    try:
        co = js['data']['iaqi']['co']['v']
        return co
    except Exception as e:
        return -1

# station list
stationIDs = pd.read_csv('https://raw.githubusercontent.com/CASA-DV-Group3/AirQuality-0/lyu/assets/data/stationIDs.csv')
stationIDs = stationIDs['id'].tolist()

# Initialise a variable to count the number of using the function
count = 0

def aqiAPI_Git():
    global count
    count = count + 1
    # initialise a features list for the geojson object
    features = []
    for i in range(0,len(stationIDs)):
        try:
            url = 'https://api.waqi.info/feed/'\
                +'@'+str(stationIDs[i])\
                +'/?token=c4103eeb230f7e9cd437f3ea4a2fe03216563895'
            js = getJSONR(url)
            # if getSTATUS(js) returns np.nan, re-request the API until it returns valid response
            while not isinstance(getSTATUS(js),str):
                js = getJSONR(url)
            features.append(Feature(geometry=Point((getGEO(js)[0], getGEO(js)[1])), 
                            properties={"id": stationIDs[i],
                                        "station": getNAME(js),
                                        "time": getTIME(js),
                                        "aqi": getAQI(js),
                                        "pm10": getPM10(js),
                                        "pm25": getPM25(js),
                                        "no2": getNO2(js),
                                        "o3": getO3(js),
                                        "so2": getSO2(js),
                                        "co": getCO(js)}))
        except Exception as e:
            print(e)
            continue  
    APIgeojson = FeatureCollection(features)
    if datetime.now().hour < 10:
        filename = 'STATIONdata'+str(datetime.now().day)+'0'+str(datetime.now().hour)+'.geojson'
    else:
        filename = 'STATIONdata'+str(datetime.now().day)+str(datetime.now().hour)+'.geojson'
    with open(filename, 'w') as f:
       dump(APIgeojson, f)
    # upload to Github
    # First create a Github instance:
    g = Github("dde59fd520aa7102e518a9b0993e9bca2f8122a2")
    # Creat a repository object:
    repo = g.get_repo("CASA-DV-Group3/AirQuality-0")
    # Create file with string contents:
    path = 'assets/data/'+filename
    repo.create_file(path, "24h station data on "+str(datetime.now()), json.dumps(APIgeojson), branch="lyu")
    print(count)
    print(filename +' - '+'success')
    print(datetime.now())
    return

scheduler = BackgroundScheduler()
scheduler.add_job(aqiAPI_Git,'interval', hours=4, start_date='2019-05-19 22:15:00', end_date='2019-05-21 01:00:00')
scheduler.add_job(aqiAPI_Git,'interval', hours=4, start_date='2019-05-19 23:15:00', end_date='2019-05-21 02:00:00')
scheduler.add_job(aqiAPI_Git,'interval', hours=4, start_date='2019-05-20 00:15:00', end_date='2019-05-21 03:00:00')
scheduler.add_job(aqiAPI_Git,'interval', hours=4, start_date='2019-05-20 01:15:00', end_date='2019-05-21 04:00:00')
scheduler.start()

3
STATIONdata2000.geojson - success
2019-05-20 00:55:45.986017
4
STATIONdata2001.geojson - success
2019-05-20 01:56:46.845407
5
STATIONdata2002.geojson - success
2019-05-20 02:55:37.171330
6
STATIONdata2003.geojson - success
2019-05-20 03:56:53.783620
7
STATIONdata2004.geojson - success
2019-05-20 04:56:40.940794
8
STATIONdata2005.geojson - success
2019-05-20 05:57:05.527108
9
STATIONdata2006.geojson - success
2019-05-20 06:55:50.886747
10
STATIONdata2007.geojson - success
2019-05-20 07:55:46.763419
11
STATIONdata2008.geojson - success
2019-05-20 08:55:44.061173
12
STATIONdata2009.geojson - success
2019-05-20 09:56:12.279695
13
STATIONdata2010.geojson - success
2019-05-20 10:56:05.339207


In [None]:
# # city list version

# import pandas as pd
# import requests
# import json
# from geojson import Point, Feature, FeatureCollection, dump
# from apscheduler.schedulers.background import BackgroundScheduler
# from datetime import datetime

# # function to request through API and get response as a JSON file
# # require requests & json package
# def getJSONR(url): 
#     try:
#         ua = {'user-agent':'Chrome/71.0.3578.98'}
#         r = requests.get(url, headers = ua, timeout=30)
#         r.raise_for_status() # raise error if r.status_code != 200
#         r.encoding = r.apparent_encoding
#         js = json.loads(r.text)
#         return js
#     except Exception as e:
#         return e

# # functions to get air quality data from the JSON response of World's Air Pollution API
# def getGEO(js):
#     try:
#         geo = []
#         lat = js['data']['city']['geo'][0]
#         lng = js['data']['city']['geo'][1]
#         geo.append(lat)
#         geo.append(lng)
#         return geo
#     except Exception as e:
#         geo = [-1, -1]
#         return geo
    
# def getTIME(js):
#     try:
#         time = js['data']['time']['s']
#         return time
#     except Exception as e:
#         return "error"
    
# def getAQI(js):
#     try:
#         aqi = js['data']['aqi']
#         return aqi
#     except Exception as e:
#         return -1

# def getPM10(js):
#     try:
#         pm10 = js['data']['iaqi']['pm10']['v']
#         return pm10
#     except Exception as e:
#         return -1

# def getPM25(js):
#     try:
#         pm25 = js['data']['iaqi']['pm25']['v']
#         return pm25
#     except Exception as e:
#         return -1

# def getNO2(js):
#     try:
#         no2 = js['data']['iaqi']['no2']['v']
#         return no2
#     except Exception as e:
#         return -1

# def getO3(js):
#     try:
#         o3 = js['data']['iaqi']['o3']['v']
#         return o3
#     except Exception as e:
#         return -1
    
# def getSO2(js):
#     try:
#         so2 = js['data']['iaqi']['so2']['v']
#         return so2
#     except Exception as e:
#         return -1

# def getCO(js):
#     try:
#         co = js['data']['iaqi']['co']['v']
#         return co
#     except Exception as e:
#         return -1

# # city list
# cityNAMEs = pd.read_csv('https://raw.githubusercontent.com/CASA-DV-Group3/AirQuality-0/lyu/assets/data/cityNAMEs.csv')
# cityNAMEs = cityNAMEs['city'].tolist()

# # Initialise a variable to count the number of using the function
# count = 0

# def API():
#     global count
#     count = count + 1
#     # initialise a features list for the geojson object
#     features = []
#     for i in range(0,len(cityNAMEs)):
#         try:
#             url = 'https://api.waqi.info/feed/'\
#             +cityNAMEs[i]\
#             +'/?token=c4103eeb230f7e9cd437f3ea4a2fe03216563895'
#             js = getJSONR(url)
#             features.append(Feature(geometry=Point((getGEO(js)[0], getGEO(js)[1])), 
#                             properties={"city": cityNAMEs[i],
#                                         "time": getTIME(js),
#                                         "aqi": getAQI(js),
#                                         "pm10": getPM10(js),
#                                         "pm25": getPM25(js),
#                                         "no2": getNO2(js),
#                                         "o3": getO3(js),
#                                         "so2": getSO2(js),
#                                         "co": getCO(js)}))
#         except Exception as e:
#             continue  
#     APIgeojson = FeatureCollection(features)
#     if datetime.now().hour < 10:
#         filename = 'APIdata'+str(datetime.now().day)+'0'+str(datetime.now().hour)+'.geojson'
#     else:
#         filename = 'APIdata'+str(datetime.now().day)+str(datetime.now().hour)+'.geojson'
#     with open(filename, 'w') as f:
#        dump(APIgeojson, f)
#     print(count)
#     print('APIdata'+str(datetime.now().day)+str(datetime.now().hour)+'.geojson'+' - '+'success')
#     print(datetime.now())
#     return

# scheduler = BackgroundScheduler()
# scheduler.add_job(API,'interval', hours=1, start_date='2019-05-15 10:45:00', end_date='2019-05-15 09:00:00')
# scheduler.start()