# Collect archived Airnow Data

Use the API to download Airnow data from select stations

## Load python packages

In [1]:
import numpy as np
import pandas as pd
from pandas.tseries.offsets import DateOffset
import matplotlib.pyplot as plt
import datetime
from pathlib import Path # System agnostic paths

import requests # for url requests
import json # for json reading/writing
import time # for epoch timestamp
import csv # for writing csv files

'''
    NOTE: you must sign up for your own EPA Now key and 
    put it along with the password in the file api_key.txt
'''

with open("../api_key.txt") as f:
    email, pwd = f.read().split(",")

In [6]:
url = 'https://aqs.epa.gov/data/api/sampleData/bySite?'

with open("payloads.txt") as f:
    f.readline() # skip header
    for lines in f:
        param, bdate, edate, state, county, site, fname = lines.split(",")
        
        keys = {
        'email': email, #for access to the api
        'key': pwd, #for access to the api
        'param': param, #88500 ("PM2.5 Total Atmospheric" for CU Athens) or 88101 ("PM2.5 Local Conditions" for Longmont)
        'bdate': bdate, #begin date (modify as needed, YYYYMMDD)
        'edate': edate, #end date (modify as needed, YYYYMMDD)
        'state': state, #Colorado
        'county': county, #Boulder County
        'site': site, #1001 (for CU Athens) or 0003 (for Longmont)
        }
    
        query_AirNow = requests.get(url, keys).json()
        
        json.dump(query_AirNow['Data'], open(f"../data/{fname}".strip(), "w")) 
        
        data_extract = pd.DataFrame(query_AirNow['Data'])

        output_data = pd.DataFrame(columns=['date', 'sample_measurement'])        
        output_data['date'] = pd.to_datetime(data_extract['date_gmt'] + ' ' + data_extract['time_gmt'])
        output_data['sample_measurement'] = data_extract['sample_measurement']
        output_data.sort_values(by='date').to_csv(f"../data/{fname}".strip().replace(".json", ".csv"),index=False)
        
        time.sleep(2)

In [5]:
output_data.sort_values(by='date')

Unnamed: 0,date,sample_measurement
66,2020-08-01 07:00:00,
67,2020-08-01 08:00:00,
0,2020-08-01 09:00:00,-1.6
68,2020-08-01 10:00:00,
1,2020-08-01 11:00:00,-1.6
...,...,...
2323,2020-11-06 02:00:00,3.7
2324,2020-11-06 03:00:00,2.9
2325,2020-11-06 04:00:00,2.7
2326,2020-11-06 05:00:00,1.8
