# Mirror Reporting Area Locations

Mirrors the Airnow reporting area locations .dat file (https://files.airnowtech.org/airnow/today/reporting_area_locations_V2.dat) and also creates a JSON version of it.

Airnow updates the .dat file twice per hour, at 25 and 55 minutes after the hour.  This mirror runs on the hour and half hour.

Data sheet is located at https://docs.airnowapi.org/docs/ReportingAreaInformationFactSheet.pdf

Reports to stat.createlab.org as `Airnow Reporting Area Locations File - Mirror`.

In [0]:
import json, os, dateutil, re, requests, subprocess, datetime, glob, stat

from dateutil import rrule, tz, parser

In [0]:
# Boilerplate to load utils.ipynb
# See https://github.com/CMU-CREATE-Lab/python-utils/blob/master/utils.ipynb


def exec_ipynb(filename_or_url):
    nb = (requests.get(filename_or_url).json() if re.match(r'https?:', filename_or_url) else json.load(open(filename_or_url)))
    if(nb['nbformat'] >= 4):
        src = [''.join(cell['source']) for cell in nb['cells'] if cell['cell_type'] == 'code']
    else:
        src = [''.join(cell['input']) for cell in nb['worksheets'][0]['cells'] if cell['cell_type'] == 'code']

    tmpname = '/tmp/%s-%s-%d.py' % (os.path.basename(filename_or_url),
                                    datetime.datetime.now().strftime('%Y%m%d%H%M%S%f'),
                                    os.getpid())
    src = '\n\n\n'.join(src)
    open(tmpname, 'w').write(src)
    code = compile(src, tmpname, 'exec')
    exec(code, globals())


exec_ipynb('./python-utils/utils.ipynb')
exec_ipynb('./airnow-common.ipynb')

In [0]:
MIRROR_TIME_PERIOD_SECS = 60 * 30   # every 30 minutes

STAT_SERVICE_NAME = 'Airnow Reporting Area Locations File - Mirror'
STAT_HOSTNAME = 'hal21'
STAT_SHORTNAME = 'airnow-mirror-reporting-area-locations-file'

REPORTING_AREA_LOCATIONS_DAT_FILENAME = 'reporting_area_locations_V2.dat'
REPORTING_AREA_LOCATIONS_JSON_FILENAME = 'reporting_area_locations.json'

In [0]:
Stat.set_service(STAT_SERVICE_NAME)

In [0]:
def jsonify_locations():
    field_names = ('name|stateCode|countryCode|forecasts|actionDayName|lat|lng|gmtOffset|hasDST|tzLabel|dstzLabel|twcCode|usaToday|forecastSource').split('|')

    reporting_areas = {}

    # The file may have non-ASCII characters, in the archaic Original IBM PC 8-bit charset
    # known today as Code page 437.  Translate to unicode during read
    source = AirnowCommon.DATA_DIRECTORY + '/' + REPORTING_AREA_LOCATIONS_DAT_FILENAME
    dest = AirnowCommon.DATA_DIRECTORY + '/' + REPORTING_AREA_LOCATIONS_JSON_FILENAME
    data =  open(source, 'r', encoding='cp437').read()

    for line in data.split('\n'):
        line = line.strip()
        if len(line) == 0:
            continue
        fields = list(map(lambda s: s.strip(), line.split('|')))    # split on | then strip whitespace from every field
        if len(field_names) != len(fields):
            Stat.warning('Record has %d field names but %d fields. Skipping.' % (len(field_names), len(fields)), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
            continue
        field_map = dict(zip(field_names, fields))
        key = '|'.join((field_map['countryCode'], field_map['stateCode'], field_map['name']))

        # delete keys we don't need
        field_map.pop('forecasts', None)
        field_map.pop('actionDayName', None)
        field_map.pop('usaToday', None)
        field_map.pop('forecasts', None)
        field_map.pop('forecastSource', None)

        if field_map['hasDST'] == 'Yes':
            field_map['hasDST'] = True
        elif field_map['hasDST'] == 'No':
            field_map['hasDST'] = False
            field_map.pop('dstzLabel', None)  # no point including the daylight savings time label if they don't do DST

        # add this site id to the param_to_site_ids map
        if key not in reporting_areas:
            reporting_areas[key] = field_map
        else:
            Stat.warning('skipping duplicate key [%s]' % (key), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)

    Stat.debug('Read %d locations from %s' % (len(reporting_areas), source), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)

    # write the JSON file to disk
    tmp = dest + '.tmp' + str(os.getpid())
    os.makedirs(os.path.dirname(tmp), exist_ok=True)
    with open(tmp, 'w') as json_file:
        json.dump(reporting_areas, json_file, sort_keys=True)
    os.rename(tmp, dest)

    # make the JSON file readable by everyone
    os.chmod(dest, stat.S_IREAD | stat.S_IWRITE | stat.S_IRGRP | stat.S_IROTH)

    # make the JSON file's file stat times match those of the .dat
    source_file_stat = os.stat(source)
    os.utime(dest, (source_file_stat.st_mtime, source_file_stat.st_mtime))

    Stat.info('Successfully created %s ' % REPORTING_AREA_LOCATIONS_JSON_FILENAME, host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)

#jsonify_locations()

In [0]:
def mirror_file():
    # Latest file is at https://files.airnowtech.org/airnow/today/reporting_area_locations_V2.dat
    (is_new, message, status_code) = AirnowCommon.mirror_airnow_file('today' + '/' + REPORTING_AREA_LOCATIONS_DAT_FILENAME, AirnowCommon.DATA_DIRECTORY + '/' + REPORTING_AREA_LOCATIONS_DAT_FILENAME)

    if is_new:
        Stat.info(message, host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
        jsonify_locations()
    else:
        if status_code == 304:
            Stat.info(message, host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
        elif status_code < 400:
            Stat.info(message, host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
        else:
            Stat.warning(message, host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)

#mirror_file()

In [0]:
def mirror():
    starting_timestamp = datetime.datetime.now().timestamp()
    Stat.info('Mirroring the Airnow %s file...' % REPORTING_AREA_LOCATIONS_DAT_FILENAME, host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
    mirror_file()
    elapsed_seconds = datetime.datetime.now().timestamp() - starting_timestamp
    Stat.up('Done mirroring the Airnow %s file (elapsed time: %d seconds)' % (REPORTING_AREA_LOCATIONS_DAT_FILENAME, elapsed_seconds), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME, valid_for_secs=MIRROR_TIME_PERIOD_SECS*1.5)

def mirror_forever():
    while True:
        mirror()
        sleep_until_next_period(MIRROR_TIME_PERIOD_SECS)

mirror_forever()