# Collect archived Airnow Data

Use the API to download Airnow data from select stations

## Load python packages

In [1]:
import numpy as np
import pandas as pd
from pandas.tseries.offsets import DateOffset
import matplotlib.pyplot as plt
import datetime
from pathlib import Path # System agnostic paths

import requests # for url requests
import json # for json reading/writing
import time # for epoch timestamp
import csv # for writing csv files

'''
    NOTE: you must sign up for your own EPA Now key and 
    put it along with the password in the file api_key.txt
'''

with open("../api_key.txt") as f:
    email, pwd = f.read().split(",")

In [2]:
url = 'https://aqs.epa.gov/data/api/sampleData/bySite?'

with open("payloads.txt") as f:
    f.readline() # skip header
    for lines in f:
        param, bdate, edate, state, county, site, fname = lines.split(",")
        
        keys = {
        'email': email, #for access to the api
        'key': pwd, #for access to the api
        'param': param, #88500 ("PM2.5 Total Atmospheric" for CU Athens) or 88101 ("PM2.5 Local Conditions" for Longmont)
        'bdate': bdate, #begin date (modify as needed, YYYYMMDD)
        'edate': edate, #end date (modify as needed, YYYYMMDD)
        'state': state, #Colorado
        'county': county, #Boulder County
        'site': site, #1001 (for CU Athens) or 0003 (for Longmont)
        }

    
        query_AirNow = requests.get(url, keys)
        #query_AirNow = requests.get("https://raw.githubusercontent.com/MichelleSanti/CO_AN_PA_CALIBRATION/main/data/202008_pm25_0001.json").json()
        
        #json.dump(query_AirNow['Data'], open(f"../data/{fname}", "w")) 
        #time.sleep(2)

In [38]:
response_list=query_AirNow.json()
#response_list

In [39]:
data_extract=pd.DataFrame(response_list.get('Data'))
data_extract

Unnamed: 0,state_code,county_code,site_number,parameter_code,poc,latitude,longitude,datum,parameter,date_local,...,detection_limit,uncertainty,qualifier,method_type,method,method_code,state,county,date_of_last_change,cbsa_code
0,08,013,1001,88500,3,40.012969,-105.267212,WGS84,PM2.5 Total Atmospheric,2020-08-01,...,0.06,,AN - Machine Malfunction.,Non-FRM,PM2.5 VSCC - FDMS-Gravimetric,761,Colorado,Boulder,2020-11-20,14500
1,08,013,1001,88500,3,40.012969,-105.267212,WGS84,PM2.5 Total Atmospheric,2020-08-01,...,0.06,,AN - Machine Malfunction.,Non-FRM,PM2.5 VSCC - FDMS-Gravimetric,761,Colorado,Boulder,2020-11-20,14500
2,08,013,1001,88500,3,40.012969,-105.267212,WGS84,PM2.5 Total Atmospheric,2020-08-01,...,0.06,,AN - Machine Malfunction.,Non-FRM,PM2.5 VSCC - FDMS-Gravimetric,761,Colorado,Boulder,2020-11-20,14500
3,08,013,1001,88500,3,40.012969,-105.267212,WGS84,PM2.5 Total Atmospheric,2020-08-01,...,0.06,,AN - Machine Malfunction.,Non-FRM,PM2.5 VSCC - FDMS-Gravimetric,761,Colorado,Boulder,2020-11-20,14500
4,08,013,1001,88500,3,40.012969,-105.267212,WGS84,PM2.5 Total Atmospheric,2020-08-01,...,0.06,,AN - Machine Malfunction.,Non-FRM,PM2.5 VSCC - FDMS-Gravimetric,761,Colorado,Boulder,2020-11-20,14500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2323,08,013,1001,88500,3,40.012969,-105.267212,WGS84,PM2.5 Total Atmospheric,2020-11-05,...,0.06,,,Non-FRM,PM2.5 VSCC - FDMS-Gravimetric,761,Colorado,Boulder,2021-02-18,14500
2324,08,013,1001,88500,3,40.012969,-105.267212,WGS84,PM2.5 Total Atmospheric,2020-11-05,...,0.06,,,Non-FRM,PM2.5 VSCC - FDMS-Gravimetric,761,Colorado,Boulder,2021-02-18,14500
2325,08,013,1001,88500,3,40.012969,-105.267212,WGS84,PM2.5 Total Atmospheric,2020-11-05,...,0.06,,,Non-FRM,PM2.5 VSCC - FDMS-Gravimetric,761,Colorado,Boulder,2021-02-18,14500
2326,08,013,1001,88500,3,40.012969,-105.267212,WGS84,PM2.5 Total Atmospheric,2020-11-05,...,0.06,,,Non-FRM,PM2.5 VSCC - FDMS-Gravimetric,761,Colorado,Boulder,2021-02-18,14500


In [40]:
data=data_extract
#data=data_extract[::-1]

In [41]:
data.columns

Index(['state_code', 'county_code', 'site_number', 'parameter_code', 'poc',
       'latitude', 'longitude', 'datum', 'parameter', 'date_local',
       'time_local', 'date_gmt', 'time_gmt', 'sample_measurement',
       'units_of_measure', 'units_of_measure_code', 'sample_duration',
       'sample_duration_code', 'sample_frequency', 'detection_limit',
       'uncertainty', 'qualifier', 'method_type', 'method', 'method_code',
       'state', 'county', 'date_of_last_change', 'cbsa_code'],
      dtype='object')

In [44]:
#Use pandas to convert to datetime format
extracted_date = pd.to_datetime(data['date_gmt'] + ' ' +data['time_gmt'])
extracted_pm25 = data['sample_measurement']

In [45]:
data_columns=pd.DataFrame(data=extracted_pm25)
data_columns.index=extracted_date
data_columns

Unnamed: 0,sample_measurement
2020-08-01 07:00:00,
2020-08-01 08:00:00,
2020-08-01 10:00:00,
2020-08-01 13:00:00,
2020-08-01 14:00:00,
...,...
2020-11-06 02:00:00,3.7
2020-11-06 03:00:00,2.9
2020-11-06 04:00:00,2.7
2020-11-06 05:00:00,1.8


In [49]:
data_columns.to_csv('BoulderAN2020.csv',index_label='Date_Time')