# State Emissions Data

## Obtain State emissions data through the EPA API. 

#### Load packages and import income data set, which contains all GeoFip codes to run loop

In [1]:
# load necessary packages
import requests
import pandas as pd
import numpy as np
import json
import getpass

In [2]:
income = pd.read_csv('Data/Income.csv', converters={'GeoFips': str})

The requirements of the HTTP request require State codes to be passed individually. The function below parses State codes and returns a 2 digit string.

In [3]:
# Get State Code
def state_code(x):
    if len(x) == 4:
        return "0" + str(x[0])
    elif len(x) ==5:
        return str(x[0]) + str(x[1])
# Apply function
income['StateCode'] = income['GeoFips'].apply(state_code)

In [4]:
# View unique State Codes
income.StateCode.unique()

array(['01', '02', '04', '05', '06', '08', '09', '10', '11', '12', '13',
       '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25',
       '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36',
       '37', '38', '39', '40', '41', '42', '44', '45', '46', '47', '48',
       '49', '50', '51', '53', '54', '55', '56'], dtype=object)

Run HTTP requests in loop

In [None]:
# single call
email = getpass.getpass('Email:')
api_key = getpass.getpass('API:')
bdate = "20190101"
edate = "20191231"
state_air_pollution = []
length = len(income['StateCode'].unique())
for x in range(0, length):
    coord_API_endpoint = "https://aqs.epa.gov/data/api/annualData/byState?email=%s&key=%s&param=88101,86101,85101&bdate=%s&edate=%s&state=" % (email, api_key, bdate, edate)
    location = income['StateCode'].unique()[x]
    state_air_pollution.append(coord_API_endpoint + location)

Email:········


In [7]:
# View length
print(len(state_air_pollution))
state_length = len(state_air_pollution)

51


In [8]:
# View output (HTTP request)
print(state_air_pollution[1])

https://aqs.epa.gov/data/api/annualData/byState?email=ceisen@ualberta.ca&key=greyosprey43&param=88101,86101,85101&bdate=20190101&edate=20191231&state=02


Next step requires converting this information into a JSON object, which can later be converted into a Pandas dataframe

In [None]:
# View a single call
state_air_pollution_data = requests.get(state_air_pollution[1]).json()

In [10]:
print(state_air_pollution_data)

{'Header': [{'status': 'Success', 'request_time': '2022-01-01T13:23:29-05:00', 'url': 'https://aqs.epa.gov/data/api/annualData/byState?email=ceisen@ualberta.ca&key=greyosprey43&param=88101,86101,85101&bdate=20190101&edate=20191231&state=02', 'rows': 172}], 'Data': [{'state_code': '02', 'county_code': '170', 'site_number': '0008', 'parameter_code': '88101', 'poc': 1, 'latitude': 61.534163, 'longitude': -149.031655, 'datum': 'WGS84', 'parameter': 'PM2.5 - Local Conditions', 'sample_duration_code': '7', 'sample_duration': '24 HOUR', 'pollutant_standard': 'PM25 24-hour 2006', 'metric_used': 'Daily Mean', 'method': 'R & P Model 2000 PM-2.5 Air Sampler w/VSCC - Gravimetric', 'year': 2019, 'units_of_measure': 'Micrograms/cubic meter (LC)', 'event_type': 'No Events', 'observation_count': 60, 'observation_percent': 98.0, 'validity_indicator': 'Y', 'valid_day_count': 60, 'required_day_count': 61, 'exceptional_data_count': 0, 'null_observation_count': 2, 'primary_exceedance_count': 0, 'secondary_

In [11]:
# Loop HTTPs requests parse into JSON
state_air_pollution_json = []
for i in range(0, state_length):
        state_air_pollution_json.append(requests.get(state_air_pollution[i]).json())

In [12]:
print(len(state_air_pollution_json))

51


Convert JSON objects into a pandas dataframe and export this file to the Data folder where it will later be used for analysis

In [13]:
# Normalize JSON to Pandas
state_data = pd.json_normalize(state_air_pollution_json, record_path=['Data'])

In [14]:
state_data.shape

(10926, 56)

In [15]:
# Export
state_data.to_csv("Data/Emissions.csv", index = False)