# Airnow Highest 5 Uploader

Uploads data for Airnow's highest 5 AQI locations to ESDR.

Reports to stat.createlab.org as `Airnow Highest Five - Uploader`.

Airnow's docs for the highest 5 are here: https://airnow.gov/index.cfm?action=airnow.news_item&newsitemid=103

In [197]:
import json, os, dateutil, re, requests, subprocess, datetime, glob, stat, urllib.parse

from dateutil import rrule, tz, parser
from sqlitedict import SqliteDict

In [198]:
# Boilerplate to load utils.ipynb
# See https://github.com/CMU-CREATE-Lab/python-utils/blob/master/utils.ipynb


def exec_ipynb(filename_or_url):
    nb = (requests.get(filename_or_url).json() if re.match(r'https?:', filename_or_url) else json.load(open(filename_or_url)))
    if(nb['nbformat'] >= 4):
        src = [''.join(cell['source']) for cell in nb['cells'] if cell['cell_type'] == 'code']
    else:
        src = [''.join(cell['input']) for cell in nb['worksheets'][0]['cells'] if cell['cell_type'] == 'code']

    tmpname = '/tmp/%s-%s-%d.py' % (os.path.basename(filename_or_url),
                                    datetime.datetime.now().strftime('%Y%m%d%H%M%S%f'),
                                    os.getpid())
    src = '\n\n\n'.join(src)
    open(tmpname, 'w').write(src)
    code = compile(src, tmpname, 'exec')
    exec(code, globals())


exec_ipynb('./python-utils/utils.ipynb')
exec_ipynb('./python-utils/esdr-library.ipynb')
exec_ipynb('./airnow-common.ipynb')

In [199]:
STAT_SERVICE_NAME = 'Airnow Highest Five - Uploader'
STAT_HOSTNAME = 'hal21'
STAT_SHORTNAME = 'airnow-highest-five-uploader'

NUM_FILES_PER_UPLOAD_BATCH = 500

RUN_INTERVAL_SECONDS = 60 * 5   # every 5 minutes

# This file stores the Geocoding API key named 'airnow-highest-5-uploader.ipynb (hal21)', defined under
# the 'Hal21 Cocalc Notebooks' project in the lab admin Google account at https://console.developers.google.com/
GOOGLE_API_KEYS_JSON = './google-api-keys.json'

# Load the Google API key
google_api_keys = {}
with open(GOOGLE_API_KEYS_JSON, 'r') as f:
    google_api_keys = json.load(f)

GEOCODING_API_KEY = google_api_keys['geocoding']
GEOCODING_API_URL = 'https://maps.googleapis.com/maps/api/geocode/json?key='+GEOCODING_API_KEY+'&address='

In [200]:
Stat.set_service(STAT_SERVICE_NAME)

In [237]:
uploaded_file_timestamps_db = SqliteDict(AirnowCommon.HIGHEST_FIVE_AQI_DIRECTORY + '/uploaded_file_timestamps.db', autocommit=True)

In [238]:
accumulated_cities = {}
accumulated_rankings = {}
accumulated_file_timestamps = {}

def clear_accumulated():
    global accumulated_cities, accumulated_rankings, accumulated_file_timestamps
    accumulated_cities = {}
    accumulated_rankings = {}
    accumulated_file_timestamps = {}

# Record format example: 1583863202.348545:1,235,111|2,809,93|3,789,91|4,946,86|5,230,81
# A colon separates the Unix timestamp from the rankings.  Rankings are pipe delimited and there should exist 5 per timestamp.
# A ranking item consists of three comma-delimited values: the rank index [1-5], the Airnow city ID, and the AQI
def process_dat_file(src):
    src_epoch_timestamp = os.path.getmtime(src)
    dt = datetime.datetime.strptime(os.path.basename(src), '%Y%m%d.dat')
    epoch_time = (dt - datetime.datetime(1970, 1, 1)).total_seconds()
    Stat.debug('Processing file %s' % src, host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)

    num_records_read = 0
    with open(src, 'r') as records:
        lineno = 0
        error_count = 0
        for record in records:
            lineno += 1
            try:
                (timestamp, rankings) = record.split(':')
                timestamp = float(timestamp)
                for ranking in rankings.split('|'):
                    (rank, city_id, aqi) = map(int,ranking.split(','))

                    if city_id not in accumulated_cities:
                        accumulated_cities[city_id] = []
                    accumulated_cities[city_id].append([timestamp, rank, aqi])

                    if rank not in accumulated_rankings:
                        accumulated_rankings[rank] = []
                    accumulated_rankings[rank].append([timestamp, city_id, aqi])

            except:
                Stat.warning('Failed to parse line %d of %s. Skipping.' % (lineno, src), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
                error_count += 1
                continue

            num_records_read += 1

        if error_count > 5:
            raise Exception('Too many parse errors (%d) reading %s, aborting' % (error_count, src))

    if error_count > 0:
        Stat.warning('Read %d records from %s (%d error(s))' % (num_records_read, src, error_count), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
    else:
        Stat.debug('Read %d records from %s (%d error(s))' % (num_records_read, src, error_count), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
    accumulated_file_timestamps[src] = src_epoch_timestamp

# process_dat_file('../../airnow-data/highest-five-aqi/dat/20200310.dat')
# print(json.dumps(accumulated_rankings, sort_keys=True, indent=3))
# print(json.dumps(accumulated_cities, sort_keys=True, indent=3))

In [239]:
cities_cached = None
def get_city_info(city_id):
    global cities_cached
    if not cities_cached:
        with open(AirnowCommon.HIGHEST_FIVE_AQI_DIRECTORY + '/airnow_city_id_to_city_info.json', 'r') as f:
            cities_cached = json.load(f)

    try:
        return cities_cached[str(city_id)]
    except:
        return None

# print(json.dumps(get_city_info(164), sort_keys=True, indent=3))  # {"city": "Pittsburgh", "state": "PA"}
# print(json.dumps(get_city_info('164'), sort_keys=True, indent=3))  # {"city": "Pittsburgh", "state": "PA"}
# print(json.dumps(get_city_info('-1'), sort_keys=True, indent=3))  # null

In [240]:
esdr = None
highest_five_city_esdr_product = None
highest_five_ranking_esdr_product = None

In [241]:
def get_highest_five_city_esdr_product():
    global esdr, highest_five_city_esdr_product
    if not esdr:
        esdr = Esdr('esdr-auth-airnow-uploader.json', user_agent='esdr-library.py['+STAT_SERVICE_NAME+']')
    if not highest_five_city_esdr_product:
        highest_five_city_esdr_product = esdr.get_product_by_name('airnow_aqi_highest_five_city')
    return highest_five_city_esdr_product

def get_highest_five_ranking_esdr_product():
    global esdr, highest_five_ranking_esdr_product
    if not esdr:
        esdr = Esdr('esdr-auth-airnow-uploader.json', user_agent='esdr-library.py['+STAT_SERVICE_NAME+']')
    if not highest_five_ranking_esdr_product:
        highest_five_ranking_esdr_product = esdr.get_product_by_name('airnow_aqi_highest_five_ranking')
    return highest_five_ranking_esdr_product

# print(json.dumps(get_highest_five_city_esdr_product(), sort_keys=True, indent=3))  # null
# print(json.dumps(get_highest_five_ranking_esdr_product(), sort_keys=True, indent=3))  # null

In [242]:
# docs at https://developers.google.com/maps/documentation/geocoding/intro
def get_lat_lon_for_address(city, state):
    try:
        address = "%s, %s" % (city, state)
        response = requests.get(GEOCODING_API_URL + urllib.parse.quote(address))
        if (response.status_code >= 200 and response.status_code < 300):
            geocode_results = response.json()
            if geocode_results and \
                geocode_results['results'] and \
                geocode_results['results'][0] and \
                geocode_results['results'][0]['geometry'] and \
                geocode_results['results'][0]['geometry']['location'] and \
                geocode_results['results'][0]['geometry']['location']['lat'] and \
                geocode_results['results'][0]['geometry']['location']['lng']:
                location = geocode_results['results'][0]['geometry']['location']
                return {
                    "lat" : location['lat'],
                    "lon" : location['lng']
                }
            else:
                Stat.warning("Failed to get geocode results for address [%s]" % (address, response.status_code), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
        else:
            Stat.warning("Failed to geocode address [%s] (HTTP %d)" % (address, response.status_code), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
    except requests.HTTPError as e:
        Stat.warning("Failed to geocode address [%s] (HTTP %d)" % (address, e.response.status_code), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
    except:
        Stat.warning("Failed to geocode address [%s]" % (address), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
    return None

# get_lat_lon_for_address('Pittsburgh','PA')   # {'lat': 40.44062479999999, 'lon': -79.9958864}

In [243]:
def upload_city(city_id):
    global esdr, highest_five_city_esdr_product
    if not esdr:
        esdr = Esdr('esdr-auth-airnow-uploader.json', user_agent='esdr-library.py['+STAT_SERVICE_NAME+']')
    if not highest_five_city_esdr_product:
        highest_five_city_esdr_product = get_highest_five_city_esdr_product()

    city_info = get_city_info(city_id)

    if city_info:
        print("Uploading city id [%d]" % city_id)

        city_and_state = "%s, %s [%d]" % (city_info['city'], city_info['state'], city_id)
        device = esdr.get_or_create_device(highest_five_city_esdr_product, serial_number=str(city_id), name=city_and_state)

        if device:
            feed = esdr.get_feed(device)
            if feed == None:
                # attempt to geocode
                lat_lon = get_lat_lon_for_address(city_info['city'], city_info['state'])
                lat = lat_lon['lat'] if lat_lon else None
                lon = lat_lon['lon'] if lat_lon else None

                # create the feed
                feed = esdr.get_or_create_feed(device, lat=lat, lon=lon)
                Stat.info('ESDR feed created for city id %d' % (city_id), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)

            if feed:
                if city_id in accumulated_cities:
                    records = accumulated_cities[city_id]

                    try:
                        esdr.upload(feed, {
                            'channel_names': ['rank', 'aqi'],
                            'data': records
                        })
                        Stat.info('%s: Uploaded %d records' % (device['name'], len(records)), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
                    except requests.HTTPError as e:
                        Stat.warning('%s: Failed to upload %d records (HTTP %d)' % (device['name'], len(records), e.response.status_code), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
                    except:
                        Stat.warning('%s: Failed to upload %d records' % (device['name'], len(records)), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
                else:
                    Stat.warning('%s: No accumulated data found. Skipping.' % (device['name']), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
            else:
                Stat.warning('%s: Failed to find/create the ESDR feed. Skipping.' % (device['name']), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
        else:
            Stat.warning('Failed to find/create the ESDR device for city id %d' % (city_id), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
    else:
        Stat.warning('Skipping upload of unknown city id %d' % (city_id), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)

In [244]:
def upload_ranking(ranking):
    global esdr, highest_five_ranking_esdr_product
    if not esdr:
        esdr = Esdr('esdr-auth-airnow-uploader.json', user_agent='esdr-library.py['+STAT_SERVICE_NAME+']')
    if not highest_five_ranking_esdr_product:
        highest_five_ranking_esdr_product = get_highest_five_ranking_esdr_product()

    # TODO...
    print("Uploading ranking [%d]" % ranking)

In [245]:
def upload_accumulated():
    global accumulated_cities, accumulated_rankings, uploaded_file_timestamps_db
    for city_id in sorted(accumulated_cities.keys()):
        upload_city(city_id)
    for ranking in sorted(accumulated_rankings.keys()):
        upload_ranking(ranking)
    for src in sorted(accumulated_file_timestamps):
        uploaded_file_timestamps_db[src] = accumulated_file_timestamps[src]
        Stat.debug('Uploaded %s to ESDR' % (src), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
    clear_accumulated()

In [246]:
def is_unmodified(src):
    global uploaded_file_timestamps_db
    return os.path.getmtime(src) == uploaded_file_timestamps_db[src]

In [247]:
def process_all():
    starting_timestamp = time.time()
    clear_accumulated()
    data_files = sorted(glob.glob(AirnowCommon.HIGHEST_FIVE_AQI_DAT_DIRECTORY + '/[0-9]*.dat'))
    Stat.info('Processing %d data files...' % (len(data_files)), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)

    for src in data_files:
        if len(accumulated_file_timestamps) == NUM_FILES_PER_UPLOAD_BATCH:
            upload_accumulated()
        try:
            if is_unmodified(src):
                continue
        except:
            pass

        process_dat_file(src)
    upload_accumulated()
    ending_timestamp = time.time()
    Stat.up('Done processing %d data files' % (len(data_files)), details='Took %.1f seconds' % (ending_timestamp - starting_timestamp), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME, valid_for_secs=RUN_INTERVAL_SECONDS*1.5)

def process_all_forever():
    while True:
        process_all()
        sleep_until_next_period(RUN_INTERVAL_SECONDS, 1*60)  # start at 1 minutes after the hour


process_all()


Stat.log info Airnow Highest Five - Uploader hal21 Processing 2 data files... None


Stat.log debug Airnow Highest Five - Uploader hal21 Processing file ../../airnow-data/highest-five-aqi/dat/20200310.dat None


Stat.log debug Airnow Highest Five - Uploader hal21 Read 13 records from ../../airnow-data/highest-five-aqi/dat/20200310.dat (0 error(s)) None


Stat.log debug Airnow Highest Five - Uploader hal21 Processing file ../../airnow-data/highest-five-aqi/dat/20200311.dat None


Stat.log debug Airnow Highest Five - Uploader hal21 Read 2 records from ../../airnow-data/highest-five-aqi/dat/20200311.dat (0 error(s)) None


Uploading city id [8]
Creating device serialNumber 8, name Lakeland, FL [8]


Creating feed {'name': 'Lakeland, FL [8] airnow_aqi_highest_five_city', 'exposure': 'outdoor', 'isPublic': 1, 'isMobile': 0, 'latitude': 28.0394654, 'longitude': -81.9498042}
Stat.log info Airnow Highest Five - Uploader hal21 ESDR feed created for city id 8 None


Stat.log info Airnow Highest Five - Uploader hal21 Lakeland, FL [8]: Uploaded 3 records None


Uploading city id [14]
Creating device serialNumber 14, name Tampa, FL [14]


Creating feed {'name': 'Tampa, FL [14] airnow_aqi_highest_five_city', 'exposure': 'outdoor', 'isPublic': 1, 'isMobile': 0, 'latitude': 27.950575, 'longitude': -82.4571776}
Stat.log info Airnow Highest Five - Uploader hal21 ESDR feed created for city id 14 None


Stat.log info Airnow Highest Five - Uploader hal21 Tampa, FL [14]: Uploaded 4 records None


Uploading city id [18]
Creating device serialNumber 18, name Columbus-Phenix City - GA/AL, GA [18]


Creating feed {'name': 'Columbus-Phenix City - GA/AL, GA [18] airnow_aqi_highest_five_city', 'exposure': 'outdoor', 'isPublic': 1, 'isMobile': 0, 'latitude': 32.4709761, 'longitude': -85.0007653}
Stat.log info Airnow Highest Five - Uploader hal21 ESDR feed created for city id 18 None


Stat.log info Airnow Highest Five - Uploader hal21 Columbus-Phenix City - GA/AL, GA [18]: Uploaded 2 records None


Uploading city id [71]
Creating device serialNumber 71, name Fall River, MA [71]


Creating feed {'name': 'Fall River, MA [71] airnow_aqi_highest_five_city', 'exposure': 'outdoor', 'isPublic': 1, 'isMobile': 0, 'latitude': 41.7014912, 'longitude': -71.1550451}
Stat.log info Airnow Highest Five - Uploader hal21 ESDR feed created for city id 71 None


Stat.log info Airnow Highest Five - Uploader hal21 Fall River, MA [71]: Uploaded 6 records None


Uploading city id [104]
Creating device serialNumber 104, name Kansas City, MO [104]


Creating feed {'name': 'Kansas City, MO [104] airnow_aqi_highest_five_city', 'exposure': 'outdoor', 'isPublic': 1, 'isMobile': 0, 'latitude': 39.0997265, 'longitude': -94.5785667}
Stat.log info Airnow Highest Five - Uploader hal21 ESDR feed created for city id 104 None


Stat.log info Airnow Highest Five - Uploader hal21 Kansas City, MO [104]: Uploaded 6 records None


Uploading city id [230]
Creating device serialNumber 230, name Beaumont-Port Arthur, TX [230]


Creating feed {'name': 'Beaumont-Port Arthur, TX [230] airnow_aqi_highest_five_city', 'exposure': 'outdoor', 'isPublic': 1, 'isMobile': 0, 'latitude': 30.080174, 'longitude': -94.1265562}
Stat.log info Airnow Highest Five - Uploader hal21 ESDR feed created for city id 230 None


Stat.log info Airnow Highest Five - Uploader hal21 Beaumont-Port Arthur, TX [230]: Uploaded 3 records None


Uploading city id [232]
Creating device serialNumber 232, name Brownsville-McAllen, TX [232]


Creating feed {'name': 'Brownsville-McAllen, TX [232] airnow_aqi_highest_five_city', 'exposure': 'outdoor', 'isPublic': 1, 'isMobile': 0, 'latitude': 25.9017472, 'longitude': -97.4974838}
Stat.log info Airnow Highest Five - Uploader hal21 ESDR feed created for city id 232 None


Stat.log info Airnow Highest Five - Uploader hal21 Brownsville-McAllen, TX [232]: Uploaded 3 records None


Uploading city id [234]
Creating device serialNumber 234, name Dallas-Fort Worth, TX [234]


Creating feed {'name': 'Dallas-Fort Worth, TX [234] airnow_aqi_highest_five_city', 'exposure': 'outdoor', 'isPublic': 1, 'isMobile': 0, 'latitude': 32.7554883, 'longitude': -97.3307658}
Stat.log info Airnow Highest Five - Uploader hal21 ESDR feed created for city id 234 None


Stat.log info Airnow Highest Five - Uploader hal21 Dallas-Fort Worth, TX [234]: Uploaded 1 records None


Uploading city id [235]
Creating device serialNumber 235, name El Paso, TX [235]


Creating feed {'name': 'El Paso, TX [235] airnow_aqi_highest_five_city', 'exposure': 'outdoor', 'isPublic': 1, 'isMobile': 0, 'latitude': 31.7618778, 'longitude': -106.4850217}


Stat.log info Airnow Highest Five - Uploader hal21 ESDR feed created for city id 235 None


Stat.log info Airnow Highest Five - Uploader hal21 El Paso, TX [235]: Uploaded 1 records None


Uploading city id [237]
Creating device serialNumber 237, name Laredo, TX [237]


Creating feed {'name': 'Laredo, TX [237] airnow_aqi_highest_five_city', 'exposure': 'outdoor', 'isPublic': 1, 'isMobile': 0, 'latitude': 27.5035613, 'longitude': -99.5075519}
Stat.log info Airnow Highest Five - Uploader hal21 ESDR feed created for city id 237 None


Stat.log info Airnow Highest Five - Uploader hal21 Laredo, TX [237]: Uploaded 1 records None


Uploading city id [240]
Creating device serialNumber 240, name San Antonio, TX [240]


Creating feed {'name': 'San Antonio, TX [240] airnow_aqi_highest_five_city', 'exposure': 'outdoor', 'isPublic': 1, 'isMobile': 0, 'latitude': 29.4241219, 'longitude': -98.49362819999999}
Stat.log info Airnow Highest Five - Uploader hal21 ESDR feed created for city id 240 None


Stat.log info Airnow Highest Five - Uploader hal21 San Antonio, TX [240]: Uploaded 6 records None


Uploading city id [243]
Creating device serialNumber 243, name Waco-Killeen, TX [243]


Creating feed {'name': 'Waco-Killeen, TX [243] airnow_aqi_highest_five_city', 'exposure': 'outdoor', 'isPublic': 1, 'isMobile': 0, 'latitude': 31.5005289, 'longitude': -97.16554119999999}
Stat.log info Airnow Highest Five - Uploader hal21 ESDR feed created for city id 243 None


Stat.log info Airnow Highest Five - Uploader hal21 Waco-Killeen, TX [243]: Uploaded 4 records None


Uploading city id [360]
Creating device serialNumber 360, name Fort Lee, NJ [360]


Creating feed {'name': 'Fort Lee, NJ [360] airnow_aqi_highest_five_city', 'exposure': 'outdoor', 'isPublic': 1, 'isMobile': 0, 'latitude': 40.8509333, 'longitude': -73.9701381}
Stat.log info Airnow Highest Five - Uploader hal21 ESDR feed created for city id 360 None


Stat.log info Airnow Highest Five - Uploader hal21 Fort Lee, NJ [360]: Uploaded 2 records None


Uploading city id [576]
Creating device serialNumber 576, name New Jersey, NJ [576]


Creating feed {'name': 'New Jersey, NJ [576] airnow_aqi_highest_five_city', 'exposure': 'outdoor', 'isPublic': 1, 'isMobile': 0, 'latitude': 40.0583238, 'longitude': -74.4056612}
Stat.log info Airnow Highest Five - Uploader hal21 ESDR feed created for city id 576 None


Stat.log info Airnow Highest Five - Uploader hal21 New Jersey, NJ [576]: Uploaded 2 records None


Uploading city id [622]
Creating device serialNumber 622, name Colville, WA [622]


Creating feed {'name': 'Colville, WA [622] airnow_aqi_highest_five_city', 'exposure': 'outdoor', 'isPublic': 1, 'isMobile': 0, 'latitude': 48.5449971, 'longitude': -117.9009545}
Stat.log info Airnow Highest Five - Uploader hal21 ESDR feed created for city id 622 None


Stat.log info Airnow Highest Five - Uploader hal21 Colville, WA [622]: Uploaded 2 records None


Uploading city id [701]
Creating device serialNumber 701, name Fairbanks, AK [701]


Creating feed {'name': 'Fairbanks, AK [701] airnow_aqi_highest_five_city', 'exposure': 'outdoor', 'isPublic': 1, 'isMobile': 0, 'latitude': 64.8377778, 'longitude': -147.7163888}


Stat.log info Airnow Highest Five - Uploader hal21 ESDR feed created for city id 701 None


Stat.log info Airnow Highest Five - Uploader hal21 Fairbanks, AK [701]: Uploaded 2 records None


Uploading city id [757]


Creating device serialNumber 757, name High Elevations of Acadia National Park, ME [757]


Creating feed {'name': 'High Elevations of Acadia National Park, ME [757] airnow_aqi_highest_five_city', 'exposure': 'outdoor', 'isPublic': 1, 'isMobile': 0, 'latitude': 44.3385559, 'longitude': -68.2733346}
Stat.log info Airnow Highest Five - Uploader hal21 ESDR feed created for city id 757 None


Stat.log info Airnow Highest Five - Uploader hal21 High Elevations of Acadia National Park, ME [757]: Uploaded 4 records None


Uploading city id [786]


Creating device serialNumber 786, name Lostwood, ND [786]


Creating feed {'name': 'Lostwood, ND [786] airnow_aqi_highest_five_city', 'exposure': 'outdoor', 'isPublic': 1, 'isMobile': 0, 'latitude': 48.4755803, 'longitude': -102.424064}


Stat.log info Airnow Highest Five - Uploader hal21 ESDR feed created for city id 786 None


Stat.log info Airnow Highest Five - Uploader hal21 Lostwood, ND [786]: Uploaded 2 records None


Uploading city id [789]
Creating device serialNumber 789, name Chester, CA [789]
