# Airnow Highest 10 Notifications

Runs every five minutes, and sends email notifications to a small set of people whenever a particular set of reporting areas appears in, changes position in, or leaves the Highest 10.  Users and reporting areas of concern are defined in JSON file `highest-10-notifications-config.json`.  Format for that configuration file is like this, where reporting area IDs are found in `reporting_areas.json`:

```
{
   "alertableReportingAreas" : {
      "pa003" : {
         "emails" : ["foo@bar.com"]
      },
      "pa005" : {
         "emails" : ["a@z.com", "foo@bar.com"]
      }
   }
}
```

This script uses SendGrid to send emails and depends on a configuration file named `sendgrid-config.json` in the current directory.  Format for that file is:

```
{
   "api_key" : "YOUR_API_KEY_HERE"
}
```

Reports to stat.createlab.org as `Airnow Highest Ten - Notifications`.

In [0]:
import json, os, dateutil, re, requests, subprocess, datetime, glob, stat
from dateutil import rrule, tz, parser
from python_http_client.exceptions import HTTPError

# using SendGrid's Python Library
# https://github.com/sendgrid/sendgrid-python
from sendgrid import *
from sendgrid.helpers.mail import Mail, From, To, Subject, PlainTextContent, HtmlContent, SendGridException

In [0]:
# Boilerplate to load utils.ipynb
# See https://github.com/CMU-CREATE-Lab/python-utils/blob/master/utils.ipynb

def exec_ipynb(filename_or_url):
    nb = (requests.get(filename_or_url).json() if re.match(r'https?:', filename_or_url) else json.load(open(filename_or_url)))
    if(nb['nbformat'] >= 4):
        src = [''.join(cell['source']) for cell in nb['cells'] if cell['cell_type'] == 'code']
    else:
        src = [''.join(cell['input']) for cell in nb['worksheets'][0]['cells'] if cell['cell_type'] == 'code']

    tmpname = '/tmp/%s-%s-%d.py' % (os.path.basename(filename_or_url),
                                    datetime.datetime.now().strftime('%Y%m%d%H%M%S%f'),
                                    os.getpid())
    src = '\n\n\n'.join(src)
    open(tmpname, 'w').write(src)
    code = compile(src, tmpname, 'exec')
    exec(code, globals())

exec_ipynb('./python-utils/utils.ipynb')
exec_ipynb('./airnow-common.ipynb')

In [0]:
STAT_SERVICE_NAME = 'Airnow Highest Ten - Notifications'
STAT_HOSTNAME = 'hal21'
STAT_SHORTNAME = 'airnow-highest-ten-notifications'

RUN_INTERVAL_SECONDS = 60 * 5   # every 5 minutes

CONFIG_FILE = './highest-10-notifications-config.json'

REPORTING_AREAS_JSON_FILENAME = 'reporting_areas.json'

# import the SendGrid config file
SENDGRID_CONFIG_FILE = './sendgrid-config.json'
SENDGRID_CONFIG = {}
with open(SENDGRID_CONFIG_FILE, 'r') as f:
    SENDGRID_CONFIG = json.load(f)

#print(SENDGRID_CONFIG['api_key'])

In [0]:
Stat.set_service(STAT_SERVICE_NAME)

In [0]:
reporting_areas = {}

notifications_config = {}

previous_rankings_str = ""
previous_alertable_reporting_area_rankings = {}

current_rankings = []

In [0]:
def read_reporting_areas():
    global reporting_areas
    with open(AirnowCommon.DATA_DIRECTORY + '/' + REPORTING_AREAS_JSON_FILENAME, 'r') as f:
        reporting_areas = json.load(f)

# read_reporting_areas()
# print(json.dumps(reporting_areas['pa003'], sort_keys=True, indent=3))

In [0]:
def get_reporting_area_name(reporting_area_id):
    global reporting_areas
    if reporting_area_id in reporting_areas:
        info = reporting_areas[reporting_area_id]
        return info['name'] + ', ' + info['stateCode']

    return "Unknown Reporting Area"

# print(get_reporting_area_name('pa003'))   # Pittsburgh, PA
# print(get_reporting_area_name('pa005'))   # Liberty-Clairton Area, PA
# print(get_reporting_area_name('ar002'))   # Springdale (Springdale-Fayetteville-Bentonville), AR
# print(get_reporting_area_name('bogus'))   # Unknown Reporting Area

In [0]:
def read_config_file():
    global notifications_config
    with open(CONFIG_FILE, 'r') as f:
        notifications_config = json.load(f)

# read_config_file()
# print(json.dumps(notifications_config, sort_keys=True, indent=3))

In [0]:
def is_alertable_reporting_area(reporting_area_id):
    global notifications_config
    return str(reporting_area_id) in notifications_config['alertableReportingAreas']

# print(is_alertable_reporting_area(None))
# print(is_alertable_reporting_area(162))
# print(is_alertable_reporting_area("pa003"))
# print(is_alertable_reporting_area("pa005"))
# print(is_alertable_reporting_area("bogus"))

In [0]:
def get_email_addresses_to_notify_for_reporting_area(reporting_area_id):
    global notifications_config
    reporting_area_id_str = str(reporting_area_id)
    if reporting_area_id_str in notifications_config['alertableReportingAreas']:
        return notifications_config['alertableReportingAreas'][reporting_area_id_str]['emails']

    return []

# print(get_email_addresses_to_notify_for_reporting_area("pa003"))
# print(get_email_addresses_to_notify_for_reporting_area("pa005"))
# print(get_email_addresses_to_notify_for_reporting_area("bogus"))
# print(get_email_addresses_to_notify_for_reporting_area(None))

In [0]:
def check_current_rankings():
    global previous_rankings_str, current_rankings
    files = glob.glob(AirnowCommon.HIGHEST_TEN_AQI_DAT_DIRECTORY + '/[0-9]*.dat')
    if len(files) == 0:
        return None
    last_file = sorted(files)[-1]
    Stat.debug('Most recent data file is %s' % (last_file), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)

    # reset the current rankings
    current_rankings = []

    # read last line from file
    last_line = None
    with open(last_file, 'r') as f:
        for line in f:
            pass
        last_line = line

    # now parse the last line
    if last_line:
        Stat.debug('Most recent rankings: %s' % (last_line), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)

        # Record format example: 1615312200.1391587:1,mo002,118|2,nm001,103|3,ms010,103|4,wi012,100|5,wa048,99|6,wi003,99|7,mi009,96|8,mi005,96|9,oh007,95|10,mt003,93
        # A colon separates the Unix timestamp from the rankings.  Rankings are pipe delimited and there should typically exist 10 per timestamp.
        # A ranking item consists of three comma-delimited values: the rank index [1-10], the Airnow reporting area ID, and the AQI
        # Note that there's actually no guarantee that there will be exactly 10 rankings.  I've seen cases where there are eleven, and it's possible there could be fewer.
        try:
            (timestamp, rankings) = last_line.split(':')
            timestamp = float(timestamp)

            # Do a quickie string comparison to make sure the rankings have actually changed.  If so, then
            # we'll do a deeper check later to make sure the alertable rankings have changed, but ignoring
            # changes in only AQI.  It's just the rankings we care about.
            if rankings == previous_rankings_str:
                Stat.debug('Rankings unchanged, nothing to do', host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
            else:
                Stat.debug('Rankings have changed, checking for alertable reporting areas', host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
                previous_rankings_str = rankings

                # parse the rankings, building a map of reporting_area_id to rank
                alertable_reporting_area_rankings = {}
                for ranking in rankings.split('|'):
                    (rank, reporting_area_id, aqi) = ranking.split(',')
                    rank = int(rank)
                    aqi = int(aqi)

                    current_rankings.append({"rank": rank, "reporting_area_id": reporting_area_id, "reporting_area_name": get_reporting_area_name(reporting_area_id), "aqi": aqi})

                    # only bother remembering it if it's an alertable reporting area
                    if is_alertable_reporting_area(reporting_area_id):
                        alertable_reporting_area_rankings[reporting_area_id] = rank

                return alertable_reporting_area_rankings

        except:
            Stat.warning('Failed to parse most recent rankings. Skipping.', host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)

    return None

#check_current_rankings()

In [0]:
def build_alerts(alertable_reporting_area_rankings):
    global previous_alertable_reporting_area_rankings
    alerts_by_reporting_area = {}

    if alertable_reporting_area_rankings != None:
        # start by checking whether any reporting areas which were previously in the highest 10 have dropped out
        for reporting_area_id in previous_alertable_reporting_area_rankings:
            if reporting_area_id not in alertable_reporting_area_rankings:
                # create a notification that this reporting area has dropped out of the highest 10
                reporting_area_name = get_reporting_area_name(reporting_area_id)
                alerts_by_reporting_area[reporting_area_id] = {'brief' : "%s: --> out" % (reporting_area_name),
                                           'verbose' : "%s is no longer in the Highest Ten" % (reporting_area_name)}

        # now check reporting areas in the current highest 10 to see whether they're new or they have changed rank
        for reporting_area_id in alertable_reporting_area_rankings:
            reporting_area_name = get_reporting_area_name(reporting_area_id)
            current_rank = alertable_reporting_area_rankings[reporting_area_id]
            if reporting_area_id in previous_alertable_reporting_area_rankings:
                # see whether the rank has changed
                previous_rank = previous_alertable_reporting_area_rankings[reporting_area_id]
                if previous_rank != current_rank:
                    # create an alert that the rank has changed
                    alerts_by_reporting_area[reporting_area_id] = {'brief' : "%s: %d --> %d" % (reporting_area_name, previous_rank, current_rank),
                                               'verbose' : "%s has changed from rank %d to rank %d" % (reporting_area_name, previous_rank, current_rank)}
            else:
                # create a notification that this reporting area is now in the highest 10
                alerts_by_reporting_area[reporting_area_id] = {'brief' : "%s: --> %d" % (reporting_area_name, current_rank),
                                           'verbose' : "%s has entered at rank %d" % (reporting_area_name, current_rank)}

        previous_alertable_reporting_area_rankings = alertable_reporting_area_rankings

    return alerts_by_reporting_area

In [0]:
def send_email(to_address, alerts):
    global current_rankings
    if len(alerts) > 0:
        if len(alerts) == 1:
            area_or_areas = "A reporting area"
            has_or_have = "has"
        else:
            area_or_areas = "Reporting areas"
            has_or_have = "have"

        email_body = area_or_areas + " you're watching "+has_or_have+" new activity in the Airnow Highest Ten:\n\n"
        for alert in alerts:
            email_body += "   %s\n" % (alert['verbose'])
        email_body += "\nThe current rankings are:\n\n"
        for ranking in current_rankings:
            email_body += "   %2d: %s   (AQI %d)\n" % (ranking['rank'], ranking['reporting_area_name'], ranking['aqi'])
        email_body += "\nView details at https://airstats.createlab.org/highest-ten/"

        from_email = From("no-reply@airstats.createlab.org", "Airnow Highest Ten Alerts")
        to_email = To(to_address)
        subject = Subject('[AHTA] ' + ' | '.join(list(map(lambda alert: alert['brief'], alerts))))
        content = PlainTextContent(email_body)

        resp = None

        try:
            message = Mail(from_email=from_email,
                           to_emails=to_email,
                           subject=subject,
                           plain_text_content=content)

            sg = SendGridAPIClient(SENDGRID_CONFIG['api_key'])
            resp = sg.send(message)
            print('Status code [%d] while sending email to [%s]' % (resp.status_code, to_address))

            if resp.status_code >= 300:
                Stat.warning('Status code [%d] while trying to send email to [%s]' % (resp.status_code, to_address), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)

        except HTTPError as he:
            print(he.to_dict)
            Stat.warning('HTTPError [%s] while trying to send email to [%s]' % (he, to_address), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)

        except Exception as e:
            print(e.message)
            Stat.warning('Exception [%s] while trying to send email to [%s]' % (e.message, to_address), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)

        return resp

# send_email("bartley@cmu.edu", [{'brief': 'Hidden Valley, AZ: --> 1', 'verbose': 'Hidden Valley, AZ has entered at rank 1'}])
# send_email("bartley@cmu.edu", [{'brief': 'Hidden Valley, AZ: --> 1', 'verbose': 'Hidden Valley, AZ has entered at rank 1'}, {'brief': 'Mississippi Gulf Coast, MS: --> 3', 'verbose': 'Mississippi Gulf Coast, MS has entered at rank 3'}])

In [0]:
def send_alerts(alerts_by_reporting_area):
    num_reporting_areas = len(alerts_by_reporting_area)
    if (num_reporting_areas > 0):
        Stat.debug('Sending alerts for %d reporting %s' % (num_reporting_areas, "area" if (num_reporting_areas == 1) else "areas"), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)

        alerts_by_email_address = {}
        for reporting_area_id in alerts_by_reporting_area:
            alert = alerts_by_reporting_area[reporting_area_id]
            Stat.debug("Reporting Area ID [%s]: %s" % (reporting_area_id, alert['brief']), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
            email_addresses = get_email_addresses_to_notify_for_reporting_area(reporting_area_id)
            for email_address in email_addresses:
                if email_address not in alerts_by_email_address:
                    alerts_by_email_address[email_address] = []

                alerts_by_email_address[email_address].append(alert)

        # send the emails
        for email in alerts_by_email_address:
            print("Sending these alerts to email [%s]:" % (email))
            for alert in alerts_by_email_address[email]:
                print("   %s" % alert['brief'])
            send_email(email, alerts_by_email_address[email])

In [0]:
def run():
    global current_rankings

    Stat.info('Checking Highest 10 for changes which would trigger notifications...', host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)
    start_time = time.time()

    # reload the reporting area info so we're sure to pick up any changes
    read_reporting_areas()

    # reload the config file. Do this every run to make it easy to change the set of alertable reporting areas and/or users to notify
    read_config_file()

    # get a map of alertable reporting areas (if any) in the current highest 10
    alertable_reporting_areas_in_highest_ten = check_current_rankings()

    if alertable_reporting_areas_in_highest_ten != None:
        print(json.dumps(alertable_reporting_areas_in_highest_ten, sort_keys=True, indent=3))

        # build alerts
        alerts_by_reporting_area = build_alerts(alertable_reporting_areas_in_highest_ten)

        # send alerts, if any
        send_alerts(alerts_by_reporting_area)
    else:
        Stat.debug('None of the reporting areas in the highest 10 require notification', host=STAT_HOSTNAME, shortname=STAT_SHORTNAME)

    end_time = time.time()
    Stat.up('Done with Highest 10 notifications!', details='Took %.1f seconds' % (end_time - start_time), host=STAT_HOSTNAME, shortname=STAT_SHORTNAME, valid_for_secs=RUN_INTERVAL_SECONDS*1.5)

def run_forever():
    while True:
        run()
        sleep_until_next_period(RUN_INTERVAL_SECONDS, 1*60)  # start at 1 minutes after the hour

run_forever()