In [1]:
import numpy as np
import pandas as pd
from pandas.tseries.offsets import DateOffset
import matplotlib.pyplot as plt
import datetime
from pathlib import Path # System agnostic paths

import requests # for url requests
import json # for json reading/writing
import time # for epoch timestamp
import csv # for writing csv files

import os

'''
    NOTE: you must sign up for your own EPA Now key and 
    put it along with the password in the file api_key.txt
'''

with open("../api_key.txt") as f:
    email, pwd = f.read().split(",")

In [2]:
url = 'https://aqs.epa.gov/data/api/sampleData/bySite?'

with open("AirNow_payloadstest.txt") as f:
    f.readline() # skip header
    for lines in f:
        param, bdate, edate, state, county, site, fname = lines.split(",")
        
        keys = {
        'email': email, #for access to the api
        'key': pwd, #for access to the api
        'param': param, #88500 ("PM2.5 Total Atmospheric" for CU Athens) or 88101 ("PM2.5 Local Conditions" for Longmont)
        'bdate': bdate, #begin date (modify as needed, YYYYMMDD)
        'edate': edate, #end date (modify as needed, YYYYMMDD)
        'state': state, #Colorado
        'county': county, #Boulder County
        'site': site, #1001 (for CU Athens) or 0003 (for Longmont)
        }
    
        print(f"[info]: START processing (start={bdate}, end={edate}, state={state}, county={county}, site={site}) > {fname}")
        query_AirNow = requests.get(url, keys).json()
        
        if len(fname.split('/')) > 1: # mk folder if necessary
            os.makedirs(f"../data/{'/'.join(fname.split('/')[:-1])}", exist_ok=True)
        
        json.dump(query_AirNow['Data'], open(f"../data/{fname}".strip(), "w")) 
        
        data_extract = pd.DataFrame(query_AirNow['Data'])

        if not data_extract.empty:
            output_data = pd.DataFrame(columns=['date', 'sample_measurement'])        
            output_data['date'] = pd.to_datetime(data_extract['date_gmt'] + ' ' + data_extract['time_gmt'])
            output_data['sample_measurement'] = data_extract['sample_measurement']
            
            #Label for csv file
            metadata = ['site_number','latitude', 'longitude', 'method_type', 'method','state', 'county']
            metadata_d = data_extract[metadata].iloc[0].to_dict()
            metadata_d['date_start'] = bdate
            metadata_d['date_end'] = edate
            
            for k, v in metadata_d.items():
                print(f'## {k.upper()} : {v}')
                
            output_data.sort_values(by='date').to_csv(f"../data/{fname}".strip().replace(".json", ".csv"),index=False)
        
            print(f"[info]: DONE processing (start={bdate}, end={edate}, state={state}, county={county}, site={site}) > CREATED {fname}")
        else:
            print(f"[warn]: UNABLE TO PROCESS (start={bdate}, end={edate}, state={state}, county={county}, site={site}) > NOT CREATED {fname}")
            
        time.sleep(2)

[info]: START processing (start=20170101, end=20171231, state=08, county=005, site=0005) > Front_Range/FRLittletonACC/testACC_17payload.json
## SITE_NUMBER : 0005
## LATITUDE : 39.604399
## LONGITUDE : -105.019526
## METHOD_TYPE : FRM
## METHOD : R & P Model 2025 PM-2.5 Sequential Air Sampler w/VSCC - Gravimetric
## STATE : Colorado
## COUNTY : Arapahoe
## DATE_START : 20170101
## DATE_END : 20171231
[info]: DONE processing (start=20170101, end=20171231, state=08, county=005, site=0005) > CREATED Front_Range/FRLittletonACC/testACC_17payload.json


In [5]:
metadata = [ 
            'site_number', 
            'latitude', 'longitude', 
            'method_type', 'method', 'method_code',
            'state', 'county']

In [6]:
metadata_d = data_extract[metadata].iloc[0].to_dict()

In [7]:
metadata_d['date_start'] = bdate
metadata_d['date_end'] = bdate

In [8]:
sample_fragment = """date,sample_measurement
2017-01-01 07:00:00,20.5
2017-01-01 07:00:00,18.2
2017-01-01 08:00:00,13.6
"""

In [9]:
metadata_d

{'site_number': '0007',
 'latitude': 39.8723,
 'longitude': -108.2183,
 'method_type': 'Non-FRM',
 'method': 'PM2.5 VSCC w/No Correction Factor - TEOM Gravimetric 30 deg C',
 'method_code': '715',
 'state': 'Colorado',
 'county': 'Rio Blanco',
 'date_start': '20220101',
 'date_end': '20220101'}

In [10]:
for k, v in metadata_d.items():
    print(f'## {k.upper()} : {v}')
print(sample_fragment)

## SITE_NUMBER : 0007
## LATITUDE : 39.8723
## LONGITUDE : -108.2183
## METHOD_TYPE : Non-FRM
## METHOD : PM2.5 VSCC w/No Correction Factor - TEOM Gravimetric 30 deg C
## METHOD_CODE : 715
## STATE : Colorado
## COUNTY : Rio Blanco
## DATE_START : 20220101
## DATE_END : 20220101
date,sample_measurement
2017-01-01 07:00:00,20.5
2017-01-01 07:00:00,18.2
2017-01-01 08:00:00,13.6

