In [None]:
import requests
import json
import pandas as pd
import numpy as np
import datetime

In [10]:
# Loading data from EPA API (https://aqs.epa.gov/aqsweb/documents/data_api.html#signup)

# AQS codes: https://aqs.epa.gov/aqsweb/documents/codetables/methods_all.html
# ozone code = 44201
# CO FIPS = 08
# Boulder FIPS = 013
# request a key - https://aqs.epa.gov/data/api/signup?email=myemail@example.com
# new key may need to be requested even if you already have one


In [11]:
epa_oz_17_resp = requests.get("https://aqs.epa.gov/data/api/metaData/fieldsByService?email=ryer7052@colorado.edu&key=khakicrane62&service=sampleData").json()

In [4]:
epa_oz_18_resp = requests.get("https://aqs.epa.gov/data/api/sampleData/byState?email=ryer7052@colorado.edu&key=khakicrane62&param=44201&bdate=20180101&edate=20181231&state=08&county=013").json()

In [None]:
epa_oz_19_resp = requests.get("https://aqs.epa.gov/data/api/sampleData/byState?email=ryer7052@colorado.edu&key=khakicrane62&param=44201&bdate=20190101&edate=20191231&state=08").json()

In [None]:
epa_oz_20_resp = requests.get("https://aqs.epa.gov/data/api/sampleData/byState?email=ryer7052@colorado.edu&key=khakicrane62&param=44201&bdate=20200101&edate=20201231&state=08").json()

In [None]:
epa_oz_21_resp = requests.get("https://aqs.epa.gov/data/api/sampleData/byState?email=ryer7052@colorado.edu&key=khakicrane62&param=44201&bdate=20210101&edate=20210901&state=08").json()

In [12]:
epa_oz_17_resp

{'Header': [{'status': 'Failed',
   'request_time': '2023-02-02T14:45:22.604-05:00',
   'url': 'https://aqs.epa.gov/data/api/metaData/fieldsByService?email=ryer7052@colorado.edu&key=khakicrane62&service=sampleData',
   'error': ['Email and/or key are invalid.']}]}

In [9]:
# initializing lists to create dataframes for each json request
oz_17_list = []
oz_18_list = []
oz_19_list = []
oz_20_list = []
oz_21_list = []


# function to create dataframes based on json data
def json_to_df(lists,json_data):
    for row in json_data["Data"]:
        lists.append(row)
        
    df = pd.DataFrame(data = lists)
    return df


# using function to create lists
oz_17_df = json_to_df(oz_17_list,epa_oz_17_resp)
#oz_20_df

oz_18_df = json_to_df(oz_18_list,epa_oz_18_resp)
#oz_18_df

oz_19_df = json_to_df(oz_19_list,epa_oz_19_resp)
#oz_19_df
    
oz_20_df = json_to_df(oz_20_list,epa_oz_20_resp)
#oz_20_df

oz_21_df = json_to_df(oz_21_list,epa_oz_21_resp)
#oz_20_df

# join dataframes
oz_17_21 = pd.concat([oz_17_df,oz_18_df,oz_19_df,oz_20_df,oz_21_df])


KeyError: 'Data'

In [13]:
# join date and time columns, turn into datetime object
cols = ["date_local","time_local"]
oz_17_21["date_time"] = oz_17_21[cols].apply(lambda x: '-'.join(x.values.astype(str)), axis="columns")
oz_17_21["date_time"] = pd.to_datetime(oz_17_21["date_time"], infer_datetime_format = True)

In [14]:
# remove non Denver metro counties
# Denver metro counties: Adams, Arapahoe, Boulder, Broomfield, Denver, Douglas, Jefferson
den_metro_counties = ["Adams","Arapahoe","Boulder","Broomfield","Denver","Douglas","Jefferson"]

oz_17_21 = oz_17_21[oz_17_21["county"].isin(den_metro_counties)]


In [15]:
oz_17_21.to_csv("hrly_o3_17to21.csv")

In [10]:
# remove columns that aren't useful for the analysis
oz_18_20 = oz_18_20.drop(columns=["parameter_code","poc","parameter","date_local","time_local","date_gmt",
                                  "time_gmt","units_of_measure","units_of_measure_code","sample_duration",
                                  "sample_duration_code","sample_frequency","detection_limit","method",
                                  "method_type","method_code","date_of_last_change","cbsa_code"])


In [11]:
np.unique(oz_18_20.site_number)

array(['0002', '0004', '0005', '0006', '0011', '0013', '0014', '0026',
       '3001'], dtype=object)

In [31]:
# calculate max 8 hour daily average of ozone
eight_means = oz_18_20.set_index(["date_time"])\
.sort_index()\
.groupby("site_number")\
.rolling(window=8, min_period=6)\
.agg({"sample_measurement": ["mean"]})\
.reset_index()


# calculate mda8 for ozone
mda8_frame = eight_means.set_index(["date_time"])\
.sort_index()\
.groupby("site_number")\
.resample("D")\
.max()\
.rename(columns={"site_number":"sites","mean":"mda8"})\
.reset_index(level="site_number")\
.drop(columns="sites")

mda8_frame

  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Unnamed: 0_level_0,site_number,sample_measurement
Unnamed: 0_level_1,Unnamed: 1_level_1,mda8
date_time,Unnamed: 1_level_2,Unnamed: 2_level_2
2018-01-01,0002,0.012875
2018-01-02,0002,0.037875
2018-01-03,0002,0.034875
2018-01-04,0002,0.037750
2018-01-05,0002,0.021375
...,...,...
2020-12-27,3001,0.039625
2020-12-28,3001,0.026375
2020-12-29,3001,0.019875
2020-12-30,3001,0.016375


In [34]:
# calculate daily mean and max
daily_max = oz_18_20.set_index("date_time")\
.sort_index()\
.groupby("site_number")\
.resample("D")\
.agg({"sample_measurement":["mean","max"]})\
.rename(columns={"mean":"daily_avg","max":"daily_max"})\
.reset_index(level="site_number")

daily_max

Unnamed: 0_level_0,site_number,sample_measurement,sample_measurement
Unnamed: 0_level_1,Unnamed: 1_level_1,daily_avg,daily_max
date_time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2018-01-01,0002,0.004000,0.016
2018-01-02,0002,0.016979,0.045
2018-01-03,0002,0.020479,0.046
2018-01-04,0002,0.018292,0.045
2018-01-05,0002,0.012354,0.039
...,...,...,...
2020-12-27,3001,0.034625,0.041
2020-12-28,3001,0.012250,0.027
2020-12-29,3001,0.012583,0.026
2020-12-30,3001,0.007826,0.029


In [36]:
# join two dfs together
oz = daily_max.merge(mda8_frame, how="left", on=["date_time","site_number"])
oz

Unnamed: 0_level_0,site_number,sample_measurement,sample_measurement,sample_measurement
Unnamed: 0_level_1,Unnamed: 1_level_1,daily_avg,daily_max,mda8
date_time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2018-01-01,0002,0.004000,0.016,0.012875
2018-01-02,0002,0.016979,0.045,0.037875
2018-01-03,0002,0.020479,0.046,0.034875
2018-01-04,0002,0.018292,0.045,0.037750
2018-01-05,0002,0.012354,0.039,0.021375
...,...,...,...,...
2020-12-27,3001,0.034625,0.041,0.039625
2020-12-28,3001,0.012250,0.027,0.026375
2020-12-29,3001,0.012583,0.026,0.019875
2020-12-30,3001,0.007826,0.029,0.016375


In [37]:
oz.to_csv("hrly_o3_17to21.csv")