# Collect archived Airnow Data

Use the API to download Airnow data from select stations

## Load python packages

In [2]:
import numpy as np
import pandas as pd
from pandas.tseries.offsets import DateOffset
import matplotlib.pyplot as plt
import datetime
from pathlib import Path # System agnostic paths

import requests # for url requests
import json # for json reading/writing
import time # for epoch timestamp
import csv # for writing csv files

import os

'''
    NOTE: you must sign up for your own EPA Now key and 
    put it along with the password in the file api_key.txt
'''

with open("../api_key.txt") as f:
    email, pwd = f.read().split(",")

In [None]:
url = 'https://aqs.epa.gov/data/api/sampleData/bySite?'

with open("payloads.txt") as f:
    f.readline() # skip header
    for lines in f:
        param, bdate, edate, state, county, site, fname = lines.split(",")
        
        keys = {
        'email': email, #for access to the api
        'key': pwd, #for access to the api
        'param': param, #88500 ("PM2.5 Total Atmospheric" for CU Athens) or 88101 ("PM2.5 Local Conditions" for Longmont)
        'bdate': bdate, #begin date (modify as needed, YYYYMMDD)
        'edate': edate, #end date (modify as needed, YYYYMMDD)
        'state': state, #Colorado
        'county': county, #Boulder County
        'site': site, #1001 (for CU Athens) or 0003 (for Longmont)
        }
    
        print(f"[info]: START processing (start={bdate}, end={edate}, state={state}, county={county}, site={site}) > {fname}")
        query_AirNow = requests.get(url, keys).json()
        
        if len(fname.split('/')) > 1: # mk folder if necessary
            os.makedirs(f"../data/{'/'.join(fname.split('/')[:-1])}", exist_ok=True)
        
        json.dump(query_AirNow['Data'], open(f"../data/{fname}".strip(), "w")) 
        
        data_extract = pd.DataFrame(query_AirNow['Data'])

        if not data_extract.empty:
            output_data = pd.DataFrame(columns=['date', 'sample_measurement'])        
            output_data['date'] = pd.to_datetime(data_extract['date_gmt'] + ' ' + data_extract['time_gmt'])
            output_data['sample_measurement'] = data_extract['sample_measurement']
            output_data.sort_values(by='date').to_csv(f"../data/{fname}".strip().replace(".json", ".csv"),index=False)
        
            print(f"[info]: DONE processing (start={bdate}, end={edate}, state={state}, county={county}, site={site}) > CREATED {fname}")
        else:
            print(f"[warn]: UNABLE TO PROCESS (start={bdate}, end={edate}, state={state}, county={county}, site={site}) > NOT CREATED {fname}")
            
        time.sleep(2)

<<<<<<< local


[info]: START processing (start=20170101, end=20171231, state=08, county=029, site=0007) > Western_CO/La_Posta/Ute3_2017payload.json

[warn]: UNABLE TO PROCESS (start=20170101, end=20171231, state=08, county=029, site=0007) > NOT CREATED Western_CO/La_Posta/Ute3_2017payload.json



>>>>>>> remote
<<<<<<< local <removed>


KeyError: 'date_gmt'

>>>>>>> remote <modified: >
