In [1]:
import json, time
import requests
import EPA_Requests as EPA

In [17]:
API_REQUEST_URL = 'https://aqs.epa.gov/data/api'
API_THROTTLE_WAIT = 0.1  # Throttle wait time between requests
EMAIL = 'kateroll@gmail.com'
USERNAME = 'kateroll@gmail.com'
APIKEY = 'silverfox66'

AQI_PARAMS_GASEOUS = ["O3", "NO2", "SO2"]
AQI_PARAMS_PARTICULATES = ["PM2.5", "PM10"]
EXTRACTION_FIELDS = ['sample_duration', 'observation_count', 'arithmetic_mean', 'aqi']

# API Endpoints
API_ACTION_LIST_CLASSES = '/list/classes?email={email}&key={key}'
API_ACTION_LIST_PARAMS = '/list/parametersByClass?email={email}&key={key}&pc={pclass}'
API_ACTION_LIST_SITES = '/list/sitesByCounty?email={email}&key={key}&state={state}&county={county}'
API_ACTION_MONITORS_COUNTY = '/monitors/byCounty?email={email}&key={key}&param={param}&bdate={begin_date}&edate={end_date}'
API_ACTION_MONITORS_BOX = '/monitors/byBox?email={email}&key={key}&param={param}&minlat={minlat}&maxlat={maxlat}&minlon={minlon}&maxlon={maxlon}&bdate={begin_date}&edate={end_date}'
API_ACTION_DAILY_SUMMARY_COUNTY = '/dailyData/byCounty?email={email}&key={key}&param={param}&state={state}&county={county}&bdate={begin_date}&edate={end_date}'

TALLAHASSEE_AQS_REQUEST_TEMPLATE = {
    "email":      EMAIL,     
    "key":        APIKEY,      
    "state":      "12",     # the two digit state FIPS # as a string
    "county":     "073",     # the three digit county FIPS # as a string
    "begin_date": "",     # the start of a time window in YYYYMMDD format
    "end_date":   "",     # the end of a time window in YYYYMMDD format, begin_date and end_date must be in the same year
    "minlat":    29.71366231884058,
    "maxlat":    31.162937681159423,
    "minlon":    -85.19645091575092,
    "maxlon":    -83.36494908424908,
    "param":     "",     # a list of comma separated 5 digit codes, max 5 codes requested
    "pclass":    ""      # parameter class is only used by the List calls
}

In [12]:
TEST_CITY_INFO = {'city_name'   : 'Seaside',
                  'county_name' : 'Clatsop',
                  'state_name'  : 'Oregon',
                  'state'       : '41',
                  'county'      : '007',
                  'fips'        : '41007',
                  'latlon' : [45.9932, -123.9226] }

In [3]:
def request_list_info(email_address = None, key = None,
                      endpoint_url = API_REQUEST_URL, 
                      endpoint_action = API_ACTION_LIST_CLASSES, 
                      request_template = AQS_REQUEST_TEMPLATE,
                      headers = None):
    
    #  Make sure we have email and key - at least
    #  This prioritizes the info from the call parameters - not what's already in the template
    if email_address:
        request_template['email'] = email_address
    if key:
        request_template['key'] = key
    
    # For the basic request we need an email address and a key
    if not request_template['email']:
        raise Exception("Must supply an email address to call 'request_list_info()'")
    if not request_template['key']: 
        raise Exception("Must supply a key to call 'request_list_info()'")

    # compose the request
    request_url = endpoint_url+endpoint_action.format(**request_template)
        
    # make the request
    try:
        # Wait first, to make sure we don't exceed a rate limit in the situation where an exception occurs
        # during the request processing - throttling is always a good practice with a free data source
        if API_THROTTLE_WAIT > 0.0:
            time.sleep(API_THROTTLE_WAIT)
        response = requests.get(request_url, headers=headers)
        json_response = response.json()
    except Exception as e:
        print(e)
        json_response = None
    return json_response

def request_monitors(email_address = None, key = None, param=None,
                          begin_date = None, end_date = None, fips = None,
                          endpoint_url = API_REQUEST_URL, 
                          endpoint_action = API_ACTION_MONITORS_COUNTY, 
                          request_template = AQS_REQUEST_TEMPLATE,
                          headers = None):
    
    #  This prioritizes the info from the call parameters - not what's already in the template
    if email_address:
        request_template['email'] = email_address
    if key:
        request_template['key'] = key
    if param:
        request_template['param'] = param
    if begin_date:
        request_template['begin_date'] = begin_date
    if end_date:
        request_template['end_date'] = end_date
    if fips and len(fips)==5:
        request_template['state'] = fips[:2]
        request_template['county'] = fips[2:]            

    # Make sure there are values that allow us to make a call - these are always required
    if not request_template['email']:
        raise Exception("Must supply an email address to call 'request_monitors()'")
    if not request_template['key']: 
        raise Exception("Must supply a key to call 'request_monitors()'")
    if not request_template['param']: 
        raise Exception("Must supply param values to call 'request_monitors()'")
    if not request_template['begin_date']: 
        raise Exception("Must supply a begin_date to call 'request_monitors()'")
    if not request_template['end_date']: 
        raise Exception("Must supply an end_date to call 'request_monitors()'")
    # Note we're not validating FIPS fields because not all of the monitors actions require the FIPS numbers
    
    # compose the request
    request_url = endpoint_url+endpoint_action.format(**request_template)
    
    # make the request
    try:
        # Wait first, to make sure we don't exceed a rate limit in the situation where an exception occurs
        # during the request processing - throttling is always a good practice with a free data source
        if API_THROTTLE_WAIT > 0.0:
            time.sleep(API_THROTTLE_WAIT)
        response = requests.get(request_url, headers=headers)
        json_response = response.json()
    except Exception as e:
        print(e)
        json_response = None
    return json_response

def request_daily_summary(email_address = None, key = None, param=None,
                          begin_date = None, end_date = None, fips = None,
                          endpoint_url = API_REQUEST_URL, 
                          endpoint_action = API_ACTION_DAILY_SUMMARY_COUNTY, 
                          request_template = AQS_REQUEST_TEMPLATE,
                          headers = None):
    
    #  This prioritizes the info from the call parameters - not what's already in the template
    if email_address:
        request_template['email'] = email_address
    if key:
        request_template['key'] = key
    if param:
        request_template['param'] = param
    if begin_date:
        request_template['begin_date'] = begin_date
    if end_date:
        request_template['end_date'] = end_date
    if fips and len(fips)==5:
        request_template['state'] = fips[:2]
        request_template['county'] = fips[2:]            

    # Make sure there are values that allow us to make a call - these are always required
    if not request_template['email']:
        raise Exception("Must supply an email address to call 'request_daily_summary()'")
    if not request_template['key']: 
        raise Exception("Must supply a key to call 'request_daily_summary()'")
    if not request_template['param']: 
        raise Exception("Must supply param values to call 'request_daily_summary()'")
    if not request_template['begin_date']: 
        raise Exception("Must supply a begin_date to call 'request_daily_summary()'")
    if not request_template['end_date']: 
        raise Exception("Must supply an end_date to call 'request_daily_summary()'")
    # Note we're not validating FIPS fields because not all of the daily summary actions require the FIPS numbers
        
    # compose the request
    request_url = endpoint_url+endpoint_action.format(**request_template)
        
    # make the request
    try:
        # Wait first, to make sure we don't exceed a rate limit in the situation where an exception occurs
        # during the request processing - throttling is always a good practice with a free data source
        if API_THROTTLE_WAIT > 0.0:
            time.sleep(API_THROTTLE_WAIT)
        response = requests.get(request_url, headers=headers)
        json_response = response.json()
    except Exception as e:
        print(e)
        json_response = None
    return json_response

def extract_summary_from_response(r=None, fields=EXTRACTION_FIELDS):
    ## the result will be structured around monitoring site, parameter, and then date
    result = dict()
    data = r["Data"]
    for record in data:
        # make sure the record is set up
        site = record['site_number']
        param = record['parameter_code']
        #date = record['date_local']    # this version keeps the respnse value YYYY-
        date = record['date_local'].replace('-','') # this puts it in YYYYMMDD format
        if site not in result:
            result[site] = dict()
            result[site]['local_site_name'] = record['local_site_name']
            result[site]['site_address'] = record['site_address']
            result[site]['state'] = record['state']
            result[site]['county'] = record['county']
            result[site]['city'] = record['city']
            result[site]['pollutant_type'] = dict()
        if param not in result[site]['pollutant_type']:
            result[site]['pollutant_type'][param] = dict()
            result[site]['pollutant_type'][param]['parameter_name'] = record['parameter']
            result[site]['pollutant_type'][param]['units_of_measure'] = record['units_of_measure']
            result[site]['pollutant_type'][param]['method'] = record['method']
            result[site]['pollutant_type'][param]['data'] = dict()
        if date not in result[site]['pollutant_type'][param]['data']:
            result[site]['pollutant_type'][param]['data'][date] = list()
        
        # now extract the specified fields
        extract = dict()
        for k in fields:
            if str(k) in record:
                extract[str(k)] = record[k]
            else:
                # this makes sure we always have the requested fields, even if
                # we have a missing value for a given day/month
                extract[str(k)] = None
        
        # add this extraction to the list for the day
        result[site]['pollutant_type'][param]['data'][date].append(extract)
    
    return result

def bounding_latlon(place=None,scale=1.0):
    minlat = place['latlon'][0] - float(scale) * LAT_25MILES
    maxlat = place['latlon'][0] + float(scale) * LAT_25MILES
    minlon = place['latlon'][1] - float(scale) * LON_25MILES
    maxlon = place['latlon'][1] + float(scale) * LON_25MILES
    return [minlat,maxlat,minlon,maxlon]

In [4]:
response = EPA.request_list_info()
response

Unnamed: 0,code,value_represented
0,AIRNOW MAPS,The parameters represented on AirNow maps (881...
1,ALL,Select all Parameters Available
2,AQI POLLUTANTS,Pollutants that have an AQI Defined
3,CORE_HAPS,Urban Air Toxic Pollutants
4,CRITERIA,Criteria Pollutants
5,CSN DART,List of CSN speciation parameters to populate ...
6,FORECAST,Parameters routinely extracted by AirNow (STI)
7,HAPS,Hazardous Air Pollutants
8,IMPROVE CARBON,IMPROVE Carbon Parameters
9,IMPROVE_SPECIATION,PM2.5 Speciated Parameters Measured at IMPROVE...


In [5]:
response = EPA.request_AQI_Pollutants()
response

Unnamed: 0,code,value_represented
0,42101,Carbon monoxide
1,42401,Sulfur dioxide
2,42602,Nitrogen dioxide (NO2)
3,44201,Ozone
4,81102,PM10 Total 0-10um STP
5,88101,PM2.5 - Local Conditions
6,88502,Acceptable PM2.5 AQI & Speciation Mass


In [6]:
response = EPA.get_monitoring_stations_by_county()
response

Unnamed: 0,code,value_represented
0,1,
1,2,
2,3,
3,4,
4,5,
5,6,
6,7,
7,8,
8,9,
9,10,


## Example 3. Making a daily summary request

The function below is designed to encapsulate requests to the EPA AQS API. When calling the function one should create/copy a parameter template, then initialize that template with values that won't change with each call. Then on each call simply pass in the parameters that need to change, like date ranges.

Another function below provides an example of extracting values and restructuring the response to make it a little more usable.

In [18]:
request_data = AQS_REQUEST_TEMPLATE.copy()
request_data['email'] = USERNAME
request_data['key'] = APIKEY
request_data['param'] = AQI_PARAMS_GASEOUS
request_data['state'] = TEST_CITY_INFO['fips'][:2]
request_data['county'] = TEST_CITY_INFO['fips'][2:]

# request daily summary data for the month of July in 2021
gaseous_aqi = request_daily_summary(request_template=request_data, begin_date="20210701", end_date="20210731")
print("Response for the gaseous pollutants ...")
#
if gaseous_aqi["Header"][0]['status'] == "Success":
    print(json.dumps(gaseous_aqi['Data'],indent=4))
elif gaseous_aqi["Header"][0]['status'].startswith("No data "):
    print("Looks like the response generated no data. You might take a closer look at your request and the response data.")
else:
    print(json.dumps(gaseous_aqi,indent=4))

Response for the gaseous pollutants ...
{
    "Header": [
        {
            "status": "Failed",
            "request_time": "2024-10-30T21:57:03.182-04:00",
            "url": "https://aqs.epa.gov/data/api/dailyData/byCounty?email=kateroll@gmail.com&key=silverfox66&param=%5B'O3',%20'NO2',%20'SO2'%5D&state=41&county=007&bdate=20210701&edate=20210731",
            "error": [
                "parameter code: ['O3', requires numeric value.",
                "parameter code:  'NO2', requires numeric value.",
                "parameter code:  'SO2'], requires numeric value."
            ]
        }
    ]
}


In [8]:
request_data = AQS_REQUEST_TEMPLATE.copy()
request_data['param'] = AQI_PARAMS_GASEOUS
# request daily summary data for the month of July in 2021
gaseous_aqi = request_daily_summary(request_template=request_data, begin_date="20210701", end_date="20210731")

In [9]:
gaseous_aqi

{'Header': [{'status': 'Failed',
   'request_time': '2024-10-30T21:47:13.746-04:00',
   'url': "https://aqs.epa.gov/data/api/dailyData/byCounty?email=kateroll@gmail.com&key=silverfox66&param=%5B'O3',%20'NO2',%20'SO2'%5D&state=12&county=073&bdate=20210701&edate=20210731",
   'error': ["parameter code: ['O3', requires numeric value.",
    "parameter code:  'NO2', requires numeric value.",
    "parameter code:  'SO2'], requires numeric value."]}]}

In [10]:
# request daily summary data for the month of July in 2021
gaseous_aqi = request_daily_summary(request_template=request_data, begin_date="20210901", end_date="20210931")
print("Response for the gaseous pollutants ...")
#
if gaseous_aqi["Header"][0]['status'] == "Success":
    print(json.dumps(gaseous_aqi['Data'],indent=4))
elif gaseous_aqi["Header"][0]['status'].startswith("No data "):
    print("Looks like the response generated no data. You might take a closer look at your request and the response data.")
else:
    print(json.dumps(gaseous_aqi,indent=4))

Response for the gaseous pollutants ...
{
    "Header": [
        {
            "status": "Failed",
            "request_time": "2024-10-30T21:48:04.205-04:00",
            "url": "https://aqs.epa.gov/data/api/dailyData/byCounty?email=kateroll@gmail.com&key=silverfox66&param=%5B'O3',%20'NO2',%20'SO2'%5D&state=12&county=073&bdate=20210901&edate=20210931",
            "error": [
                "parameter code: ['O3', requires numeric value.",
                "parameter code:  'NO2', requires numeric value.",
                "parameter code:  'SO2'], requires numeric value.",
                "edate: 20210931, requires the following format: yyyymmdd."
            ]
        }
    ]
}


In [10]:
request_data = AQS_REQUEST_TEMPLATE.copy()
request_data['param'] = AQI_PARAMS_GASEOUS

In [20]:
request_data = AQS_REQUEST_TEMPLATE.copy()
request_data['param'] = AQI_PARAMS_GASEOUS

# Initialize a list to hold all results
all_gaseous_aqi_data = []

# Define the start and end years for data retrieval
start_year = 1961
end_year = 2021

# Loop through each year and retrieve data for the date range May 1 - October 31
for year in range(start_year, end_year + 1):
    # Set the date range for May 1 - October 31
    may_oct_start = f"{year}0501"
    may_oct_end = f"{year}1031"

    # Request data for May 1 - October 31
    year_data = request_daily_summary(request_template=request_data,
                                      begin_date=may_oct_start,
                                      end_date=may_oct_end)

    # Check if data was returned and add it to the list
    if year_data is not None:
        all_gaseous_aqi_data.append({year: year_data})

    # Print status for each year's data retrieval
    print(f"Data retrieved for {year}")

Data retrieved for 1961
Data retrieved for 1962
Data retrieved for 1963
Data retrieved for 1964
Data retrieved for 1965
Data retrieved for 1966
Data retrieved for 1967
Data retrieved for 1968
Data retrieved for 1969
Data retrieved for 1970
Data retrieved for 1971
Data retrieved for 1972
Data retrieved for 1973
Data retrieved for 1974
Data retrieved for 1975
Data retrieved for 1976
Data retrieved for 1977
Data retrieved for 1978
Data retrieved for 1979
Data retrieved for 1980
Data retrieved for 1981
Data retrieved for 1982
Data retrieved for 1983
Data retrieved for 1984
Data retrieved for 1985
Data retrieved for 1986
Data retrieved for 1987
Data retrieved for 1988
Data retrieved for 1989
Data retrieved for 1990
Data retrieved for 1991
Data retrieved for 1992
Data retrieved for 1993
Data retrieved for 1994
Data retrieved for 1995
Data retrieved for 1996
Data retrieved for 1997
Data retrieved for 1998
Data retrieved for 1999
Data retrieved for 2000
Data retrieved for 2001
Data retrieved f

In [21]:
all_gaseous_aqi_data

[{1961: {'Header': [{'status': 'Failed',
     'request_time': '2024-10-30T21:08:46.816-04:00',
     'url': "https://aqs.epa.gov/data/api/dailyData/byCounty?email=kateroll@gmail.com&key=silverfox66&param=%5B'O3',%20'NO2',%20'SO2'%5D&state=12&county=073&bdate=19610501&edate=19611031",
     'error': ["parameter code: ['O3', requires numeric value.",
      "parameter code:  'NO2', requires numeric value.",
      "parameter code:  'SO2'], requires numeric value."]}]}},
 {1962: {'Header': [{'status': 'Failed',
     'request_time': '2024-10-30T21:08:47.281-04:00',
     'url': "https://aqs.epa.gov/data/api/dailyData/byCounty?email=kateroll@gmail.com&key=silverfox66&param=%5B'O3',%20'NO2',%20'SO2'%5D&state=12&county=073&bdate=19620501&edate=19621031",
     'error': ["parameter code: ['O3', requires numeric value.",
      "parameter code:  'NO2', requires numeric value.",
      "parameter code:  'SO2'], requires numeric value."]}]}},
 {1963: {'Header': [{'status': 'Failed',
     'request_time': 

In [15]:
request_data['param'] = AQI_PARAMS_PARTICULATES
# request daily summary data for the month of July in 2020
particulate_aqi = request_daily_summary(request_template=request_data, begin_date="20200701", end_date="20200731")
print("Response for the particulate pollutants ...")
#
if particulate_aqi["Header"][0]['status'] == "Success":
    print(json.dumps(particulate_aqi['Data'],indent=4))
elif particulate_aqi["Header"][0]['status'].startswith("No data "):
    print("Looks like the response generated no data. You might take a closer look at your request and the response data.")
else:
    print(json.dumps(particulate_aqi,indent=4))

Response for the particulate pollutants ...
{
    "Header": [
        {
            "status": "Failed",
            "request_time": "2024-10-30T20:50:37.445-04:00",
            "url": "https://aqs.epa.gov/data/api/dailyData/byCounty?email=kateroll@gmail.com&key=silverfox66&param=%5B'PM2.5',%20'PM10'%5D&state=12&county=073&bdate=20200701&edate=20200731",
            "error": [
                "parameter code: ['PM2.5', requires numeric value.",
                "parameter code:  'PM10'], requires numeric value."
            ]
        }
    ]
}


In [18]:
extract_summary_from_response(gaseous_aqi)

KeyError: 'Data'

In [21]:

request_data = AQS_REQUEST_TEMPLATE.copy()
request_data['email'] = USERNAME
request_data['key'] = APIKEY
request_data['param'] = AQI_PARAMS_GASEOUS
request_data['state'] = CITY_LOCATIONS['loveland']['fips'][:2]
request_data['county'] = CITY_LOCATIONS['loveland']['fips'][2:]

# request daily summary data for the month of July in 2021
gaseous_aqi = request_daily_summary(request_template=request_data, begin_date="20210701", end_date="20210731")
print("Response for the gaseous pollutants ...")
#
if gaseous_aqi["Header"][0]['status'] == "Success":
    print(json.dumps(gaseous_aqi['Data'],indent=4))
elif gaseous_aqi["Header"][0]['status'].startswith("No data "):
    print("Looks like the response generated no data. You might take a closer look at your request and the response data.")
else:
    print(json.dumps(gaseous_aqi,indent=4))

request_data['param'] = AQI_PARAMS_PARTICULATES
# request daily summary data for the month of July in 2021
particulate_aqi = request_daily_summary(request_template=request_data, begin_date="20210701", end_date="20210731")
print("Response for the particulate pollutants ...")
#
if particulate_aqi["Header"][0]['status'] == "Success":
    print(json.dumps(particulate_aqi['Data'],indent=4))
elif particulate_aqi["Header"][0]['status'].startswith("No data "):
    print("Looks like the response generated no data. You might take a closer look at your request and the response data.")
else:
    print(json.dumps(particulate_aqi,indent=4))

Response for the gaseous pollutants ...
[
    {
        "state_code": "08",
        "county_code": "069",
        "site_number": "0007",
        "parameter_code": "44201",
        "poc": 1,
        "latitude": 40.27813,
        "longitude": -105.54564,
        "datum": "WGS84",
        "parameter": "Ozone",
        "sample_duration_code": "1",
        "sample_duration": "1 HOUR",
        "pollutant_standard": "Ozone 1-hour 1979",
        "date_local": "2021-07-01",
        "units_of_measure": "Parts per million",
        "event_type": "No Events",
        "observation_count": 24,
        "observation_percent": 100.0,
        "validity_indicator": "Y",
        "arithmetic_mean": 0.037542,
        "first_max_value": 0.049,
        "first_max_hour": 14,
        "aqi": null,
        "method_code": "047",
        "method": "INSTRUMENTAL - ULTRA VIOLET",
        "local_site_name": "Rocky Mountain NP - Long's Peak",
        "site_address": "ROCKY MOUNTAIN NP",
        "state": "Colorado",
   

The form of the daily summary response is a bit verbose with lots of repeated values. What we'll do is create a data structure that relies on a hierarchical context to summarize the data.

The two responses (for Bend, OR) show that not every monitoring site produces values. As well, it looks like the monitoring sites only produce values for particulates and not for gaseous pollutants.

The next function takes the response and a set of fields that should be extracted for their data values. The code assumes those fields are available. If there are missing values something could certainly go wrong. The function creates a summary for each monitoring site.

In [8]:
#
#    This is a list of field names - data - that will be extracted from each record
#
EXTRACTION_FIELDS = ['sample_duration','observation_count','arithmetic_mean','aqi']

#
#    The function creates a summary record
def extract_summary_from_response(r=None, fields=EXTRACTION_FIELDS):
    ## the result will be structured around monitoring site, parameter, and then date
    result = dict()
    data = r["Data"]
    for record in data:
        # make sure the record is set up
        site = record['site_number']
        param = record['parameter_code']
        #date = record['date_local']    # this version keeps the respnse value YYYY-
        date = record['date_local'].replace('-','') # this puts it in YYYYMMDD format
        if site not in result:
            result[site] = dict()
            result[site]['local_site_name'] = record['local_site_name']
            result[site]['site_address'] = record['site_address']
            result[site]['state'] = record['state']
            result[site]['county'] = record['county']
            result[site]['city'] = record['city']
            result[site]['pollutant_type'] = dict()
        if param not in result[site]['pollutant_type']:
            result[site]['pollutant_type'][param] = dict()
            result[site]['pollutant_type'][param]['parameter_name'] = record['parameter']
            result[site]['pollutant_type'][param]['units_of_measure'] = record['units_of_measure']
            result[site]['pollutant_type'][param]['method'] = record['method']
            result[site]['pollutant_type'][param]['data'] = dict()
        if date not in result[site]['pollutant_type'][param]['data']:
            result[site]['pollutant_type'][param]['data'][date] = list()
        
        # now extract the specified fields
        extract = dict()
        for k in fields:
            if str(k) in record:
                extract[str(k)] = record[k]
            else:
                # this makes sure we always have the requested fields, even if
                # we have a missing value for a given day/month
                extract[str(k)] = None
        
        # add this extraction to the list for the day
        result[site]['pollutant_type'][param]['data'][date].append(extract)
    
    return result

In [23]:

extract_gaseous = extract_summary_from_response(gaseous_aqi)
print("Summary of gaseous extraction ...")
print(json.dumps(extract_gaseous,indent=4))

extract_particulate = extract_summary_from_response(particulate_aqi)
print("Summary of particulate extraction ...")
print(json.dumps(extract_particulate,indent=4))


Summary of gaseous extraction ...
{
    "0007": {
        "local_site_name": "Rocky Mountain NP - Long's Peak",
        "site_address": "ROCKY MOUNTAIN NP",
        "state": "Colorado",
        "county": "Larimer",
        "city": "Not in a city",
        "pollutant_type": {
            "44201": {
                "parameter_name": "Ozone",
                "units_of_measure": "Parts per million",
                "method": "INSTRUMENTAL - ULTRA VIOLET",
                "data": {
                    "20210701": [
                        {
                            "sample_duration": "1 HOUR",
                            "observation_count": 24,
                            "arithmetic_mean": 0.037542,
                            "aqi": null
                        },
                        {
                            "sample_duration": "8-HR RUN AVG BEGIN HOUR",
                            "observation_count": 24,
                            "arithmetic_mean": 0.037292,
              

## Example 4. Making request by bounding box

There are some places that don't have monitoring stations. In the EPA FAQ that covers the AQS system, they note that their monitoring covers 2000 of the 3000+ US counties.

The AQS API has a mechanism of requesting data and monitoring stations using a geographic bounding box. The above examples just demonstrated the use of the AQS API for making requests by counties. The examples below illustrate the use of bounding boxes. The example below makes requests to identify monitoring stations within the bounding box. Once you knew you have monitoring stations, then the bounding box could be used in the daily summary requests to get AQS data.


In [24]:
#
#   These are rough estimates for creating bounding boxes based on a city location
#   You can find these rough estimates on the USGS website:
#   https://www.usgs.gov/faqs/how-much-distance-does-a-degree-minute-and-second-cover-your-maps
#
LAT_25MILES = 25.0 * (1.0/69.0)    # This is about 25 miles of latitude in decimal degrees
LON_25MILES = 25.0 * (1.0/54.6)    # This is about 25 miles of longitude in decimal degrees
#
#   Compute rough estimates for a bounding box around a given place
#   The bounding box is scaled in 50 mile increments. That is, the bounding box will have sides that
#   are rough multiples of 50 miles, with the center of the box around the indicated place.
#   The scale parameter determines the scale (size) of the bounding box
#
def bounding_latlon(place=None,scale=1.0):
    minlat = place['latlon'][0] - float(scale) * LAT_25MILES
    maxlat = place['latlon'][0] + float(scale) * LAT_25MILES
    minlon = place['latlon'][1] - float(scale) * LON_25MILES
    maxlon = place['latlon'][1] + float(scale) * LON_25MILES
    return [minlat,maxlat,minlon,maxlon]



In [18]:
#
#    This implements the monitors request. This requests monitoring stations. This can be done by state, county, or bounding box. 
#
#    Like the two other functions, this can be called with a mixture of a defined parameter dictionary, or with function
#    parameters. If function parameters are provided, those take precedence over any parameters from the request template.
#
def request_monitors(email_address = None, key = None, param=None,
                          begin_date = None, end_date = None, fips = None,
                          endpoint_url = API_REQUEST_URL, 
                          endpoint_action = API_ACTION_MONITORS_COUNTY, 
                          request_template = AQS_REQUEST_TEMPLATE,
                          headers = None):
    
    #  This prioritizes the info from the call parameters - not what's already in the template
    if email_address:
        request_template['email'] = email_address
    if key:
        request_template['key'] = key
    if param:
        request_template['param'] = param
    if begin_date:
        request_template['begin_date'] = begin_date
    if end_date:
        request_template['end_date'] = end_date
    if fips and len(fips)==5:
        request_template['state'] = fips[:2]
        request_template['county'] = fips[2:]            

    # Make sure there are values that allow us to make a call - these are always required
    if not request_template['email']:
        raise Exception("Must supply an email address to call 'request_monitors()'")
    if not request_template['key']: 
        raise Exception("Must supply a key to call 'request_monitors()'")
    if not request_template['param']: 
        raise Exception("Must supply param values to call 'request_monitors()'")
    if not request_template['begin_date']: 
        raise Exception("Must supply a begin_date to call 'request_monitors()'")
    if not request_template['end_date']: 
        raise Exception("Must supply an end_date to call 'request_monitors()'")
    # Note we're not validating FIPS fields because not all of the monitors actions require the FIPS numbers
    
    # compose the request
    request_url = endpoint_url+endpoint_action.format(**request_template)
    
    # make the request
    try:
        # Wait first, to make sure we don't exceed a rate limit in the situation where an exception occurs
        # during the request processing - throttling is always a good practice with a free data source
        if API_THROTTLE_WAIT > 0.0:
            time.sleep(API_THROTTLE_WAIT)
        response = requests.get(request_url, headers=headers)
        json_response = response.json()
    except Exception as e:
        print(e)
        json_response = None
    return json_response


In [19]:
#
request_data = AQS_REQUEST_TEMPLATE.copy()
request_data['email'] = USERNAME
request_data['key'] = APIKEY
request_data['param'] = AQI_PARAMS_PARTICULATES     # remember we have both gaseous and particulates
# 
#   We got the monitoring stations for Bend OR above (Deschutes county) - let's work with that one again
request_data['state'] = CITY_LOCATIONS['bend']['fips'][:2]
request_data['county'] = CITY_LOCATIONS['bend']['fips'][2:]
#
# the first example uses the default - request monitors by county, we'll just use a recent date for now
response = request_monitors(request_template=request_data, begin_date="20210701", end_date="20210731")
#
# the response should be similar to the 'list' request above - but in this case we should only get monitors that
# monitor the AQI_PARAMS_PARTICULATES set of params.
#
if response["Header"][0]['status'] == "Success":
    print(json.dumps(response['Data'],indent=4))
else:
    print(json.dumps(response,indent=4))


[
    {
        "state_code": "41",
        "county_code": "017",
        "site_number": "0120",
        "parameter_code": "88101",
        "poc": 1,
        "parameter_name": "PM2.5 - Local Conditions",
        "open_date": "2001-03-01",
        "close_date": null,
        "concurred_exclusions": null,
        "dominant_source": "AREA",
        "measurement_scale": "NEIGHBORHOOD",
        "measurement_scale_def": "500 M TO 4KM",
        "monitoring_objective": "POPULATION EXPOSURE",
        "last_method_code": "209",
        "last_method_description": "Met One BAM-1022 Mass Monitor w/ VSCC or TE-PM2.5C - Beta Attenuation",
        "last_method_begin_date": "2023-09-30",
        "naaqs_primary_monitor": "Y",
        "qa_primary_monitor": null,
        "monitor_type": "SPM",
        "networks": null,
        "monitoring_agency_code": "0821",
        "monitoring_agency": "Oregon Department Of Environmental Quality",
        "si_id": 13113,
        "latitude": 44.063916,
        "longitud


Given the response above for Bend, OR - it looks like there are 5 monitoring sites that are in Deschutes County - AND that monitor particulate air quality. Their unique site IDs are given in the 'site_number' field of the dictionary. The response also includes their lat,lon positions should we need that. Comparing the site numbers to the response from the list request above - all of the sites here are there. So, things seem to be heading in the right direction.



Next, we're going to change the monitors request to use a bounding box and see what happens.


In [20]:
#
#    Create a copy of the AQS_REQUEST_TEMPLATE
#
request_data = AQS_REQUEST_TEMPLATE.copy()
request_data['email'] = USERNAME
request_data['key'] = APIKEY
request_data['param'] = AQI_PARAMS_PARTICULATES     # same particulate request as the one abover
# 
#   Not going to use these - comment them out
#request_data['state'] = CITY_LOCATIONS['bend']['fips'][:2]
#request_data['county'] = CITY_LOCATIONS['bend']['fips'][2:]
#
#   Now, we need bounding box parameters

#   50 mile box
bbox = bounding_latlon(CITY_LOCATIONS['bend'],scale=1.0)
#   100 mile box
#bbox = bounding_latlon(CITY_LOCATIONS['bend'],scale=2.0)
#   150 mile box
#bbox = bounding_latlon(CITY_LOCATIONS['bend'],scale=3.0)
#   200 mile box
#bbox = bounding_latlon(CITY_LOCATIONS['bend'],scale=4.0)

# the bbox response comes back as a list - [minlat,maxlat,minlon,maxlon]

#   put our bounding box into the request_data
request_data['minlat'] = bbox[0]
request_data['maxlat'] = bbox[1]
request_data['minlon'] = bbox[2]
request_data['maxlon'] = bbox[3]

#
#   we need to change the action for the API from the default to the bounding box - same recent date for now
response = request_monitors(request_template=request_data, begin_date="20210701", end_date="20210731",
                            endpoint_action = API_ACTION_MONITORS_BOX)
#
#
#
if response["Header"][0]['status'] == "Success":
    print(json.dumps(response['Data'],indent=4))
else:
    print(json.dumps(response,indent=4))


[
    {
        "state_code": "41",
        "county_code": "017",
        "site_number": "0120",
        "parameter_code": "88101",
        "poc": 1,
        "parameter_name": "PM2.5 - Local Conditions",
        "open_date": "2001-03-01",
        "close_date": null,
        "concurred_exclusions": null,
        "dominant_source": "AREA",
        "measurement_scale": "NEIGHBORHOOD",
        "measurement_scale_def": "500 M TO 4KM",
        "monitoring_objective": "POPULATION EXPOSURE",
        "last_method_code": "209",
        "last_method_description": "Met One BAM-1022 Mass Monitor w/ VSCC or TE-PM2.5C - Beta Attenuation",
        "last_method_begin_date": "2023-09-30",
        "naaqs_primary_monitor": "Y",
        "qa_primary_monitor": null,
        "monitor_type": "SPM",
        "networks": null,
        "monitoring_agency_code": "0821",
        "monitoring_agency": "Oregon Department Of Environmental Quality",
        "si_id": 13113,
        "latitude": 44.063916,
        "longitud


The response above indicates that all five of the particulate monitoring stations in Deschutes County are within (roughly) 25 miles of Bend, OR. Our 50 mile bounding box was centered on Bend - and we got all 5 stations.

There are places where monitoring stations might be hard to find. This would mean that air quality data is harder to get for that location.

Lets, consider Seaside, OR.


In [21]:
#
request_data = AQS_REQUEST_TEMPLATE.copy()
request_data['email'] = USERNAME
request_data['key'] = APIKEY
request_data['param'] = AQI_PARAMS_PARTICULATES     # like the above example, just focusing on particulates
# 
#    This time, we'll try getting the list of monitoring stations for Seaside, OR
request_data['state'] = CITY_LOCATIONS['seaside']['fips'][:2]
request_data['county'] = CITY_LOCATIONS['seaside']['fips'][2:]
#
#    For the Seaside example, let's get everything that we can find in the county.
response = request_monitors(request_template=request_data, begin_date="20210701", end_date="20210731")
#
#
if response["Header"][0]['status'] == "Success":
    print(json.dumps(response['Data'],indent=4))
elif response["Header"][0]['status'].startswith("No data "):
    print("Looks like the response generated no monitoring stations.")
else:
    print(json.dumps(response,indent=4))


Looks like the response generated no monitoring stations.



The response indicates that there are no monitoring stations in Clatsop County (where Seaside, OR is located). The EPA says they only have monitoring stations in about 2000 of the 3000+ US counties. If we're going to find monitoring stations - maybe we can use the bounding box to find them.

Let's scale up some bounding boxes and see if we can find some monitoring stations for Seaside OR.


In [22]:

request_data = AQS_REQUEST_TEMPLATE.copy()
request_data['email'] = USERNAME
request_data['key'] = APIKEY
request_data['param'] = AQI_PARAMS_PARTICULATES     # same particulate request as the one abover
# 
#   Not going to use these - comment them out
#request_data['state'] = CITY_LOCATIONS['seaside']['fips'][:2]
#request_data['county'] = CITY_LOCATIONS['seaside']['fips'][2:]
#
#   Now, we need bounding box parameters

#   50 mile box - roughly within 25 miles of the location
#bbox = bounding_latlon(CITY_LOCATIONS['seaside'],scale=1.0)
#   100 mile box - roughly within 50 miles of the location
bbox = bounding_latlon(CITY_LOCATIONS['seaside'],scale=2.0)
#   150 mile box - roughly within 75 miles of the location
#bbox = bounding_latlon(CITY_LOCATIONS['seaside'],scale=3.0)
#   200 mile box
#bbox = bounding_latlon(CITY_LOCATIONS['seaside'],scale=4.0)

# the bbox response comes back as a list - [minlat,maxlat,minlon,maxlon]

#   put our bounding box into the request_data
request_data['minlat'] = bbox[0]
request_data['maxlat'] = bbox[1]
request_data['minlon'] = bbox[2]
request_data['maxlon'] = bbox[3]

#
#   we need to change the action for the API from the default to the bounding box - same recent date for now
response = request_monitors(request_template=request_data, begin_date="20210701", end_date="20210731",
                            endpoint_action = API_ACTION_MONITORS_BOX)
#
#
#
if response["Header"][0]['status'] == "Success":
    print(json.dumps(response['Data'],indent=4))
else:
    print(json.dumps(response,indent=4))


[
    {
        "state_code": "41",
        "county_code": "067",
        "site_number": "0006",
        "parameter_code": "88502",
        "poc": 3,
        "parameter_name": "Acceptable PM2.5 AQI & Speciation Mass",
        "open_date": "2019-06-20",
        "close_date": null,
        "concurred_exclusions": null,
        "dominant_source": "AREA",
        "measurement_scale": "NEIGHBORHOOD",
        "measurement_scale_def": "500 M TO 4KM",
        "monitoring_objective": "POPULATION EXPOSURE",
        "last_method_code": "791",
        "last_method_description": "OTHR AUTOMATD 2.5 MASS CONCENT - SURROGATE MEASURE",
        "last_method_begin_date": "2019-06-20",
        "naaqs_primary_monitor": null,
        "qa_primary_monitor": null,
        "monitor_type": "SPM",
        "networks": null,
        "monitoring_agency_code": "0821",
        "monitoring_agency": "Oregon Department Of Environmental Quality",
        "si_id": 104536,
        "latitude": 45.52483,
        "longitude": 


Looks like we can find two monitoring stations within 50 miles of Seaside, OR. If we needed to know the air quality in Seaside, we could use a Daily Summary request with the same bounding box and get data for these two stations - which are the closest ones we could find.
