# Mirror Monitoring Site Locations

Mirrors the Airnow monitoring site locations .dat file (https://files.airnowtech.org/airnow/today/monitoring_site_locations.dat) and also creates a JSON version of it.

Reports to stat.createlab.org as `Airnow Monitoring Site Locations File - Mirror`.

In [77]:
import json, os, dateutil, re, requests, subprocess, datetime, glob, stat

from dateutil import rrule, tz, parser

In [78]:
# Boilerplate to load utils.ipynb
# See https://github.com/CMU-CREATE-Lab/python-utils/blob/master/utils.ipynb


def exec_ipynb(filename_or_url):
    nb = (requests.get(filename_or_url).json() if re.match(r'https?:', filename_or_url) else json.load(open(filename_or_url)))
    if(nb['nbformat'] >= 4):
        src = [''.join(cell['source']) for cell in nb['cells'] if cell['cell_type'] == 'code']
    else:
        src = [''.join(cell['input']) for cell in nb['worksheets'][0]['cells'] if cell['cell_type'] == 'code']

    tmpname = '/tmp/%s-%s-%d.py' % (os.path.basename(filename_or_url),
                                    datetime.datetime.now().strftime('%Y%m%d%H%M%S%f'),
                                    os.getpid())
    src = '\n\n\n'.join(src)
    open(tmpname, 'w').write(src)
    code = compile(src, tmpname, 'exec')
    exec(code, globals())


exec_ipynb('./python-utils/utils.ipynb')
exec_ipynb('./airnow-common.ipynb')

In [79]:
MIRROR_TIME_PERIOD_SECS = 60 * 15   # every 15 minutes
STAT_SERVICE_NAME = 'Airnow Monitoring Site Locations File - Mirror'
STAT_HOSTNAME = 'airnow'
STAT_SHORTNAME = 'airnow-mirror-monitoring-site-locations-file'

MONITORING_SITE_LOCATIONS_DAT_FILENAME = 'monitoring_site_locations.dat'
MONITORING_SITE_LOCATIONS_JSON_FILENAME = 'monitoring_site_locations.json'

In [80]:
Stat.set_service(STAT_SERVICE_NAME)

In [81]:
def jsonify_monitoring_site_locations():
    field_names = ('AQSID|parameter name|site code|site name|status|' +
                   'agency id|agency name|EPA region|latitude|longitude|' +
                   'elevation|GMT offset|country code|CMSA code|CMSA name|' +
                   'MSA code|MSA name|state code|state name|county code|' +
                   'county name|city code|city name').split('|')

    sites_and_params = {'sites': {}, 'param_to_site_ids': {}}

    # monitoring_site_locations.dat has non-ASCII characters, in the archaic Original IBM PC 8-bit charset
    # known today as Code page 437.  Translate to unicode during read
    source = AirnowCommon.DATA_DIRECTORY + '/' + MONITORING_SITE_LOCATIONS_DAT_FILENAME
    dest = AirnowCommon.DATA_DIRECTORY + '/' + MONITORING_SITE_LOCATIONS_JSON_FILENAME
    data =  open(source, 'r', encoding='cp437').read()
    # Test: 000050121 is PARC OCÉANIE

    for line in data.split('\n'):
        line = line.strip()
        if len(line) == 0:
            continue
        fields = line.strip().split('|')
        if len(field_names) != len(fields):
            Stat.warning('Monitoring site has %d field names but %d fields. Skipping.' % (len(field_names), len(fields)), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
            continue
        channel_info = dict(zip(field_names, fields))
        aqsid = channel_info['AQSID']
        parameter = channel_info['parameter name']

        # add this site id to the param_to_site_ids map
        if parameter not in sites_and_params['param_to_site_ids']:
            sites_and_params['param_to_site_ids'][parameter] = []
        if aqsid not in sites_and_params['param_to_site_ids'][parameter]:
            sites_and_params['param_to_site_ids'][parameter].append(aqsid)

        # add this site to the sites map
        if aqsid not in sites_and_params['sites']:
            # remove the "parameter name" key from the dictionary, because we'll store it in a parameters list instead
            del channel_info['parameter name']
            sites_and_params['sites'][aqsid] = channel_info
            sites_and_params['sites'][aqsid]['parameters'] = []

        if parameter in sites_and_params['sites'][aqsid]['parameters']:
            Stat.warning('Skipping duplicate in monitoring_site_locations: %s:%s' % (aqsid, parameter), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
        else:
            sites_and_params['sites'][aqsid]['parameters'].append(parameter)

    Stat.debug('Read %d sites and %d params from %s' % (len(sites_and_params['sites']),
                                                        len(sites_and_params['param_to_site_ids']),
                                                        source),
               host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)

    # write the JSON file to disk
    tmp = dest + '.tmp' + str(os.getpid())
    os.makedirs(os.path.dirname(tmp), exist_ok=True)
    with open(tmp, 'w') as json_file:
        json.dump(sites_and_params, json_file, sort_keys=True)
    os.rename(tmp, dest)

    # make the JSON file readable by everyone
    os.chmod(dest, stat.S_IREAD | stat.S_IWRITE | stat.S_IRGRP | stat.S_IROTH)

    # make the JSON file's file stat times match those of the .dat
    source_file_stat = os.stat(source)
    os.utime(dest, (source_file_stat.st_mtime, source_file_stat.st_mtime))

    Stat.info('Successfully created %s ' % MONITORING_SITE_LOCATIONS_JSON_FILENAME, host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)


#jsonify_monitoring_site_locations()

In [84]:
def mirror_monitoring_site_locations():
    # Latest file is at https://files.airnowtech.org/airnow/today/monitoring_site_locations.dat
    (is_new, message, status_code) = AirnowCommon.mirror_airnow_file('today' + '/' + MONITORING_SITE_LOCATIONS_DAT_FILENAME, AirnowCommon.DATA_DIRECTORY + '/' + MONITORING_SITE_LOCATIONS_DAT_FILENAME)

    if is_new:
        Stat.info(message, host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
        jsonify_monitoring_site_locations()
    else:
        if status_code == 304:
            print(message)   # use print instead of Stat to reduce noise to stat.createlab.org
            Stat.info(message, host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
        elif status_code < 400:
            Stat.info(message, host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
        else:
            Stat.warning(message, host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)

mirror_monitoring_site_locations()

Local mirror of https://files.airnowtech.org/airnow/today/monitoring_site_locations.dat is up to date.  Skipping.


In [86]:
def mirror():
    Stat.up('Mirroring the Airnow %s file...' % MONITORING_SITE_LOCATIONS_DAT_FILENAME, host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
    mirror_monitoring_site_locations()
    Stat.up('Done mirroring the Airnow %s file' % MONITORING_SITE_LOCATIONS_DAT_FILENAME, host=STAT_HOSTNAME, shortname=STAT_SHORTNAME, valid_for_secs=MIRROR_TIME_PERIOD_SECS*1.5)

def mirror_forever():
    while True:
        mirror()
        sleep_until_next_period(MIRROR_TIME_PERIOD_SECS)

#mirror_forever()
mirror()

Stat.log up Airnow Monitoring Site Locations File - Mirror airnow Mirroring the Airnow monitoring_site_locations.dat file... None


Local mirror of https://files.airnowtech.org/airnow/today/monitoring_site_locations.dat is up to date.  Skipping.
Stat.log up Airnow Monitoring Site Locations File - Mirror airnow Done mirroring the Airnow monitoring_site_locations.dat file None
