In [42]:
import os
import glob
import pandas as pd

In [74]:
path = 'input/'

In [75]:
files = glob.glob(os.path.join(path, '*.csv'))

In [115]:
comparison_states = ['Washington', 'California']

def state_select(file, comparison_states):
    states = ['Oregon'] + comparison_states
    df = pd.read_csv(
        file,
        dtype={'State Code': str,
               'County Code': str},
        parse_dates=['Date'])
    df_subset = df.loc[df['State Name'].isin(states)]
    return df_subset

In [116]:
df = pd.concat(state_select(file, states).reset_index() for file in files)

In [117]:
df.groupby(by=['State Name'])['Date'].max()

State Name
California   2018-04-30
Oregon       2017-12-31
Washington   2018-01-31
Name: Date, dtype: datetime64[ns]

In [79]:
df.to_csv('output/df_west.csv', index=False)

# Computing AQI

$I = \frac{I_{high}-I_{low}}{C_{high}-C_{low}}(C-C_{low})+I_{low}\\
$<sup>1</sup>

$\textrm{where}
$

$I=\textrm{the Air Quality index,}\\
C=\textrm{the pollutant concentration,}\\
C_{low}=\textrm{the concentration breakpoint that is}\leq C,\\
C_{high}=\textrm{the concentration breakpoint that is}\geq C,\\
I_{low}=\textrm{the index breakpoint corresponding to } C_{low},\\
I_{high}=\textrm{the index breakpoint corresponding to } C_{high}.
$

# Measurements at SE Lafayette 8/21/2018


In [None]:
pm25 = 66.8 # unhealthy
ozone = 35 # good?
no2 = 11.7 # good?

Using the above equation, then, we get an AQI for 8/21 of 159:

In [206]:
(200-151)/(150.4-55.5)*(66.8-55.5)+151

156.8345626975764

In [200]:
df_list = pd.read_html(
    'https://en.wikipedia.org/wiki/Air_quality_index', header=0)

In [201]:
df_list[14]

Unnamed: 0,O3 (ppb),O3 (ppb).1,PM2.5 (µg/m3),PM10 (µg/m3),CO (ppm),SO2 (ppb),NO2 (ppb),AQI,AQI.1
0,Clow - Chigh (avg),Clow - Chigh (avg),Clow- Chigh (avg),Clow - Chigh (avg),Clow - Chigh (avg),Clow - Chigh (avg),Clow - Chigh (avg),Ilow - Ihigh,Category
1,0-54 (8-hr),-,0.0-12.0 (24-hr),0-54 (24-hr),0.0-4.4 (8-hr),0-35 (1-hr),0-53 (1-hr),0-50,Good
2,55-70 (8-hr),-,12.1-35.4 (24-hr),55-154 (24-hr),4.5-9.4 (8-hr),36-75 (1-hr),54-100 (1-hr),51-100,Moderate
3,71-85 (8-hr),125-164 (1-hr),35.5-55.4 (24-hr),155-254 (24-hr),9.5-12.4 (8-hr),76-185 (1-hr),101-360 (1-hr),101-150,Unhealthy for Sensitive Groups
4,86-105 (8-hr),165-204 (1-hr),55.5-150.4 (24-hr),255-354 (24-hr),12.5-15.4 (8-hr),186-304 (1-hr),361-649 (1-hr),151-200,Unhealthy
5,106-200 (8-hr),205-404 (1-hr),150.5-250.4 (24-hr),355-424 (24-hr),15.5-30.4 (8-hr),305-604 (24-hr),650-1249 (1-hr),201-300,Very Unhealthy
6,-,405-504 (1-hr),250.5-350.4 (24-hr),425-504 (24-hr),30.5-40.4 (8-hr),605-804 (24-hr),1250-1649 (1-hr),301-400,Hazardous
7,-,505-604 (1-hr),350.5-500.4 (24-hr),505-604 (24-hr),40.5-50.4 (8-hr),805-1004 (24-hr),1650-2049 (1-hr),401-500,


In [202]:
aqi_df = df_list[14].drop(0)

In [203]:
aqi_df

Unnamed: 0,O3 (ppb),O3 (ppb).1,PM2.5 (µg/m3),PM10 (µg/m3),CO (ppm),SO2 (ppb),NO2 (ppb),AQI,AQI.1
1,0-54 (8-hr),-,0.0-12.0 (24-hr),0-54 (24-hr),0.0-4.4 (8-hr),0-35 (1-hr),0-53 (1-hr),0-50,Good
2,55-70 (8-hr),-,12.1-35.4 (24-hr),55-154 (24-hr),4.5-9.4 (8-hr),36-75 (1-hr),54-100 (1-hr),51-100,Moderate
3,71-85 (8-hr),125-164 (1-hr),35.5-55.4 (24-hr),155-254 (24-hr),9.5-12.4 (8-hr),76-185 (1-hr),101-360 (1-hr),101-150,Unhealthy for Sensitive Groups
4,86-105 (8-hr),165-204 (1-hr),55.5-150.4 (24-hr),255-354 (24-hr),12.5-15.4 (8-hr),186-304 (1-hr),361-649 (1-hr),151-200,Unhealthy
5,106-200 (8-hr),205-404 (1-hr),150.5-250.4 (24-hr),355-424 (24-hr),15.5-30.4 (8-hr),305-604 (24-hr),650-1249 (1-hr),201-300,Very Unhealthy
6,-,405-504 (1-hr),250.5-350.4 (24-hr),425-504 (24-hr),30.5-40.4 (8-hr),605-804 (24-hr),1250-1649 (1-hr),301-400,Hazardous
7,-,505-604 (1-hr),350.5-500.4 (24-hr),505-604 (24-hr),40.5-50.4 (8-hr),805-1004 (24-hr),1650-2049 (1-hr),401-500,


In [204]:
aqi_df[['min','max']] = aqi_df['AQI'].str.split('-', 1, expand=True)

In [205]:
aqi_df

Unnamed: 0,O3 (ppb),O3 (ppb).1,PM2.5 (µg/m3),PM10 (µg/m3),CO (ppm),SO2 (ppb),NO2 (ppb),AQI,AQI.1,min,max
1,0-54 (8-hr),-,0.0-12.0 (24-hr),0-54 (24-hr),0.0-4.4 (8-hr),0-35 (1-hr),0-53 (1-hr),0-50,Good,0,50
2,55-70 (8-hr),-,12.1-35.4 (24-hr),55-154 (24-hr),4.5-9.4 (8-hr),36-75 (1-hr),54-100 (1-hr),51-100,Moderate,51,100
3,71-85 (8-hr),125-164 (1-hr),35.5-55.4 (24-hr),155-254 (24-hr),9.5-12.4 (8-hr),76-185 (1-hr),101-360 (1-hr),101-150,Unhealthy for Sensitive Groups,101,150
4,86-105 (8-hr),165-204 (1-hr),55.5-150.4 (24-hr),255-354 (24-hr),12.5-15.4 (8-hr),186-304 (1-hr),361-649 (1-hr),151-200,Unhealthy,151,200
5,106-200 (8-hr),205-404 (1-hr),150.5-250.4 (24-hr),355-424 (24-hr),15.5-30.4 (8-hr),305-604 (24-hr),650-1249 (1-hr),201-300,Very Unhealthy,201,300
6,-,405-504 (1-hr),250.5-350.4 (24-hr),425-504 (24-hr),30.5-40.4 (8-hr),605-804 (24-hr),1250-1649 (1-hr),301-400,Hazardous,301,400
7,-,505-604 (1-hr),350.5-500.4 (24-hr),505-604 (24-hr),40.5-50.4 (8-hr),805-1004 (24-hr),1650-2049 (1-hr),401-500,,401,500


> Oregon’s index is based on three pollutants regulated by the federal Clean Air Act: ground-level ozone, particle pollution and nitrogen dioxide.<sup>2</sup>

Ozone: O<sub>3</sub> (ppb)

Particle pollution: PM<sub>2.5</sub> (µg/m<sup>3</sup>)

Nitrogen dioxide: NO<sub>2</sub> (ppb)

# References

<sup>1</sup> https://en.wikipedia.org/wiki/Air_quality_index#Computing_the_AQI

<sup>2</sup> https://web.archive.org/web/20180822170335/https://www.oregon.gov/deq/aq/Pages/aqi.aspx