In [1]:
import numpy as np
import pandas as pd
from pandas.tseries.offsets import DateOffset
import matplotlib.pyplot as plt
import datetime
from pathlib import Path # System agnostic paths

import requests # for url requests
import json # for json reading/writing
import time # for epoch timestamp
import csv # for writing csv files

import os

'''
    NOTE: you must sign up for your own EPA Now key and 
    put it along with the password in the file api_key.txt
'''

with open("../api_key.txt") as f:
    email, pwd = f.read().split(",")

In [2]:
url = 'https://aqs.epa.gov/data/api/sampleData/bySite?'

with open("AirNow_payloads.txt") as f:
    f.readline() # skip header
    for lines in f:
        try:
            param, bdate, edate, state, county, site, fname = lines.split(",")

            keys = {
            'email': email, #for access to the api
            'key': pwd, #for access to the api
            'param': param, #88500 ("PM2.5 Total Atmospheric" for CU Athens) or 88101 ("PM2.5 Local Conditions" for Longmont)
            'bdate': bdate, #begin date (modify as needed, YYYYMMDD)
            'edate': edate, #end date (modify as needed, YYYYMMDD)
            'state': state, #Colorado
            'county': county, #Boulder County
            'site': site, #1001 (for CU Athens) or 0003 (for Longmont)
            }

            print(f"[info]: START processing (start={bdate}, end={edate}, state={state}, county={county}, site={site}) > {fname}")
            query_AirNow = requests.get(url, keys).json()

            if len(fname.split('/')) > 1: # mk folder if necessary
                os.makedirs(f"../data/{'/'.join(fname.split('/')[:-1])}", exist_ok=True)
        
            json.dump(query_AirNow['Data'], open(f"../data/{fname}".strip(), "w")) 

            data_extract = pd.DataFrame(query_AirNow['Data'])

            if not data_extract.empty:
                #Label for csv file
                metadata = ['site_number','latitude', 'longitude', 'method_type', 'method','state', 'county']
                metadata_d = data_extract[metadata].iloc[0].to_dict()
                metadata_d['date_start'] = bdate
                metadata_d['date_end'] = edate

                with open(f"../data/{fname}".strip().replace(".json", ".csv"), "w") as fo:
                    fo.write(
                        '\n'.join([f'## {k.upper()} : {v}' for k, v in metadata_d.items()])
                    )
                    fo.write('\n')

                output_data = pd.DataFrame(columns=['date', 'sample_measurement'])        
                output_data['date'] = pd.to_datetime(data_extract['date_gmt'] + ' ' + data_extract['time_gmt'])
                output_data['sample_measurement'] = data_extract['sample_measurement']

                output_data.sort_values(by='date').to_csv(f"../data/{fname}".strip().replace(".json", ".csv"), index=False, mode='a')

                print(f"[info]: DONE processing (start={bdate}, end={edate}, state={state}, county={county}, site={site}) > CREATED {fname}")
            else:
                print(f"[warn]: UNABLE TO PROCESS (start={bdate}, end={edate}, state={state}, county={county}, site={site}) > NOT CREATED {fname}")
        except:
            print(f"[error]: UNABLE TO PROCESS (start={bdate}, end={edate}, state={state}, county={county}, site={site}) > NOT CREATED {fname}")

        time.sleep(2)

[info]: START processing (start=20170101, end=20171231, state=08, county=103, site=0007) > Western_CO/Piceance_Basin/RioBasin_2017payload.json

[warn]: UNABLE TO PROCESS (start=20170101, end=20171231, state=08, county=103, site=0007) > NOT CREATED Western_CO/Piceance_Basin/RioBasin_2017payload.json

[info]: START processing (start=20180101, end=20181231, state=08, county=103, site=0007) > Western_CO/Piceance_Basin/RioBasin_2018payload.json

[warn]: UNABLE TO PROCESS (start=20180101, end=20181231, state=08, county=103, site=0007) > NOT CREATED Western_CO/Piceance_Basin/RioBasin_2018payload.json

[info]: START processing (start=20190101, end=20191231, state=08, county=103, site=0007) > Western_CO/Piceance_Basin/RioBasin_2019payload.json

[warn]: UNABLE TO PROCESS (start=20190101, end=20191231, state=08, county=103, site=0007) > NOT CREATED Western_CO/Piceance_Basin/RioBasin_2019payload.json

[info]: START processing (start=20200101, end=20201231, state=08, county=103, site=0007) > Wester