# Collected archived AirNow Data

Use the API to download Airnow data from select stations

# Load python packages

In [1]:
import pandas as pd
import datetime
import requests  # for url requests
import json  # for json reading/writing
import os
import time

'''
    NOTE: you must sign up for your own EPA Now key and 
    put it along with the password in the file api_key.txt
'''

with open("../api_key.txt") as f:
    email, pwd = f.read().split(",")

# Access EPA API

In [2]:
def get_payload(keys, fname, url='https://aqs.epa.gov/data/api/sampleData/bySite?'):
    payload_status = False
    try:
        _, _, param, bdate, edate, state, county, site = keys.values()

        print(f"[info]: START processing (start={bdate}, end={edate}, state={state}, county={county}, site={site}) > {fname}")
        query_AirNow = requests.get(url, keys).json()

        if query_AirNow['Header'][0]['status'] == 'Failed':
            payload_status = f"[error]: UNABLE TO PROCESS (start={bdate}, end={edate}, state={state}, county={county}, site={site}) > NOT CREATED {fname.strip()} (reason: '{query_AirNow['Header'][0]['error'][0].strip()})'"
            print(payload_status)
        else:
            if len(fname.split('/')) > 1:  # mk folder if necessary
                os.makedirs(f"../Data/{'/'.join(fname.split('/')[:-1])}", exist_ok=True)

            json.dump(query_AirNow['Data'], open(f"../Data/{fname}".strip(), "w")) 
            data_extract = pd.DataFrame(query_AirNow['Data'])

            if not data_extract.empty:
                # Label for csv file
                metadata = ['site_number', 'latitude', 'longitude', 'method_type', 'method', 'state', 'county']
                metadata_d = data_extract[metadata].iloc[0].to_dict()
                metadata_d['date_start'] = bdate
                metadata_d['date_end'] = edate

                with open(f"../data/{fname}".strip().replace(".json", ".csv"), "w") as fo:
                    fo.write(
                        '\n'.join([f'## {k.upper()} : {v}' for k, v in metadata_d.items()])
                    )
                    fo.write('\n')

                output_data = pd.DataFrame(columns=['date', 'sample_measurement'])        
                output_data['date'] = pd.to_datetime(data_extract['date_gmt'] + ' ' + data_extract['time_gmt'])
                output_data['sample_measurement'] = data_extract['sample_measurement']

                output_data.sort_values(by='date').to_csv(f"../Data/{fname}".strip().replace(".json", ".csv"), index=False, mode='a')

                print(f"[info]: DONE processing (start={bdate}, end={edate}, state={state}, county={county}, site={site}) > CREATED {fname}")
                payload_status = None
            else:
                api_status = query_AirNow['Header'][0]['status']
                payload_status = f"[warn]: UNABLE TO PROCESS (start={bdate}, end={edate}, state={state}, county={county}, site={site}) > NOT CREATED {fname.strip()} ([empty data] reason: {api_status})"
                print(payload_status)
    except Exception as e:
        payload_status = f"[error]: UNABLE TO PROCESS (start={bdate}, end={edate}, state={state}, county={county}, site={site}) > NOT CREATED {fname.strip()} (reason: {e})"
        print(payload_status)

    return payload_status

# Set up query keys and error response allocation

In [6]:
now = datetime.datetime.now()
timestamp = f"{now.year}{now.month:02}{now.day:02}{now.hour:02}{now.minute:02}{now.second:02}"
url = 'https://aqs.epa.gov/data/api/sampleData/bySite?'

with open("AirNow_payloads.txt") as f, \
     open(f"{timestamp}_payload_errors.txt", "w") as f_err:
    f.readline()  # skip header
    for line in f:
        if line.strip():
            param, bdate, edate, state, county, site, fname = line.split(",")
            keys = {
                'email': email, #for access to the api
                'key': pwd, #for access to the api
                'param': param, #88500 ("PM2.5 Total Atmospheric" for CU Athens) or 88101 ("PM2.5 Local Conditions" for Longmont)
                'bdate': bdate, #begin date (modify as needed, YYYYMMDD)
                'edate': edate, #end date (modify as needed, YYYYMMDD)
                'state': state, #Colorado
                'county': county, #County
                'site': site, #1001 (for CU Athens) or 0003 (for Longmont)
            }

            payload = get_payload(keys, fname)
            if payload:
                f_err.write(line)
                f_err.write(f"\t{payload}")
                f_err.write("\n\n")

            time.sleep(1.25)

[info]: START processing (start=20190101, end=20191231, state=08, county=097, site=0008) > Denver/ContinMonitors/ASPENMyst2019_payload.json
[warn]: UNABLE TO PROCESS (start=20190101, end=20191231, state=08, county=097, site=0008) > NOT CREATED Denver/ContinMonitors/ASPENMyst2019_payload.json ([empty data] reason: No data matched your selection)
