In [None]:
# Interpreting the data
# https://www.epa.gov/outdoor-air-quality-data/about-air-data-reports

In [10]:
# Dependencies
import pandas as pd

In [11]:
# Create the dataframe
file_path = "Resources/annual_aqi_by_county_2019.csv"
aqi_df = pd.read_csv(file_path)
aqi_df.head()

Unnamed: 0,State,County,Year,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days PM2.5,Days PM10
0,Alabama,Baldwin,2019,271,237,34,0,0,0,0,80,52,37,0,0,220,51,0
1,Alabama,Clay,2019,107,97,10,0,0,0,0,67,50,30,0,0,0,107,0
2,Alabama,Colbert,2019,263,252,11,0,0,0,0,61,47,37,0,0,228,35,0
3,Alabama,DeKalb,2019,361,324,37,0,0,0,0,90,51,39,0,0,331,30,0
4,Alabama,Elmore,2019,228,208,20,0,0,0,0,100,50,39,0,0,228,0,0


In [34]:
# View all column names
aqi_df.columns.tolist()

['State',
 'County',
 'Year',
 'Days with AQI',
 'Good Days',
 'Moderate Days',
 'Unhealthy for Sensitive Groups Days',
 'Unhealthy Days',
 'Very Unhealthy Days',
 'Hazardous Days',
 'Max AQI',
 '90th Percentile AQI',
 'Median AQI',
 'Days CO',
 'Days NO2',
 'Days Ozone',
 'Days PM2.5',
 'Days PM10']

In [36]:
dict = {'Days with AQI': 'DaysWithAQI',
        'Good Days': 'GoodDays',
        'Moderate Days': 'ModerateDays',
        'Unhealthy for Sensitive Groups Days': 'UnhealthyForSensitiveGroupsDays',
        'Unhealthy Days': 'UnhealthyDays',
        'Very Unhealthy Days': 'VeryUnhealthyDays',
        'Max AQI': 'MaxAQI',
        '90th Percentile AQI': '90thPercentileAQI',
        'Median AQI': 'MedianAQI',
        'Days CO': 'DaysCO',
        'Days NO2': 'DaysNO2',
        'Days Ozone': 'DaysOzone',
        'Days PM2.5': 'DaysPM2.5',
        'Days PM10': 'DaysPM10'
       }
 
# call rename () method
aqi_df.rename(columns=dict,
          inplace=True)
 
# print Data frame after rename columns
aqi_df.head()

Unnamed: 0,State,County,Year,DaysWithAQI,GoodDays,ModerateDays,UnhealthyForSensitiveGroupsDays,UnhealthyDays,VeryUnhealthyDays,Hazardous Days,MaxAQI,90thPercentileAQI,MedianAQI,DaysCO,DaysNO2,DaysOzone,DaysPM2.5,DaysPM10
0,Alabama,Baldwin,2019,271,237,34,0,0,0,0,80,52,37,0,0,220,51,0
1,Alabama,Clay,2019,107,97,10,0,0,0,0,67,50,30,0,0,0,107,0
2,Alabama,Colbert,2019,263,252,11,0,0,0,0,61,47,37,0,0,228,35,0
3,Alabama,DeKalb,2019,361,324,37,0,0,0,0,90,51,39,0,0,331,30,0
4,Alabama,Elmore,2019,228,208,20,0,0,0,0,100,50,39,0,0,228,0,0


In [37]:
# Days with AQI Info
'''Number of days in the year having an Air Quality Index value. 
This is the number of days on which measurements from any monitoring site in the county 
or MSA were reported to the AQS database.'''

# Summary statistics
aqi_df["DaysWithAQI"].describe()

count    1019.000000
mean      319.996075
std        80.606298
min         2.000000
25%       315.500000
50%       362.000000
75%       365.000000
max       365.000000
Name: DaysWithAQI, dtype: float64

In [38]:
# Sort by Days with AQI
aqi_df.sort_values(by=['DaysWithAQI'], ascending=False).head(10)

Unnamed: 0,State,County,Year,DaysWithAQI,GoodDays,ModerateDays,UnhealthyForSensitiveGroupsDays,UnhealthyDays,VeryUnhealthyDays,Hazardous Days,MaxAQI,90thPercentileAQI,MedianAQI,DaysCO,DaysNO2,DaysOzone,DaysPM2.5,DaysPM10
509,Montana,Sanders,2019,365,330,35,0,0,0,0,94,50,25,0,0,0,346,19
334,Kentucky,Edmonson,2019,365,290,75,0,0,0,0,77,56,41,0,0,231,134,0
359,Louisiana,Jefferson,2019,365,310,55,0,0,0,0,84,54,38,0,6,268,91,0
358,Louisiana,Iberville,2019,365,306,55,4,0,0,0,119,58,37,0,1,316,48,0
357,Louisiana,East Baton Rouge,2019,365,260,103,2,0,0,0,108,68,42,0,8,178,179,0
356,Louisiana,Calcasieu,2019,365,239,126,0,0,0,0,90,63,44,0,1,136,228,0
758,Pennsylvania,Mercer,2019,365,290,75,0,0,0,0,95,57,41,0,0,259,106,0
762,Pennsylvania,Philadelphia,2019,365,244,115,6,0,0,0,140,71,44,0,30,206,129,0
766,Pennsylvania,Washington,2019,365,252,113,0,0,0,0,94,64,44,0,0,205,160,0
769,Pennsylvania,York,2019,365,299,66,0,0,0,0,93,56,41,0,22,232,111,0


In [39]:
# Sort by Days with AQI
aqi_df.sort_values(by=['DaysWithAQI']).head(10)

Unnamed: 0,State,County,Year,DaysWithAQI,GoodDays,ModerateDays,UnhealthyForSensitiveGroupsDays,UnhealthyDays,VeryUnhealthyDays,Hazardous Days,MaxAQI,90thPercentileAQI,MedianAQI,DaysCO,DaysNO2,DaysOzone,DaysPM2.5,DaysPM10
775,Puerto Rico,Guayama,2019,2,2,0,0,0,0,0,29,29,29,0,0,0,0,2
788,South Carolina,Colleton,2019,7,7,0,0,0,0,0,32,32,29,0,0,5,2,0
693,Oklahoma,Grant,2019,27,27,0,0,0,0,0,32,31,16,0,0,0,27,0
768,Pennsylvania,Wyoming,2019,27,19,8,0,0,0,0,85,63,29,0,0,0,27,0
115,Colorado,Fremont,2019,32,32,0,0,0,0,0,29,23,13,0,0,0,0,32
141,Country Of Mexico,BAJA CALIFORNIA NORTE,2019,32,5,19,8,0,0,0,144,117,72,0,0,0,0,32
696,Oklahoma,Le Flore,2019,38,35,3,0,0,0,0,79,48,28,0,0,0,34,4
43,Arkansas,Craighead,2019,49,43,6,0,0,0,0,61,53,30,0,0,0,49,0
912,Virginia,Hopewell City,2019,57,57,0,0,0,0,0,19,15,8,0,0,0,0,57
894,Virgin Islands,St Croix,2019,57,51,6,0,0,0,0,71,59,20,0,0,0,57,0


In [44]:
# Sort by Days with AQI and determine the number of counties with 
# a measurement for every day of the year
aqi_df[aqi_df.DaysWithAQI >= 365].shape

(370, 18)

In [45]:
# Sort by Days with AQI and determine the number of counties with 
# a measurement for 90% of days in the year
aqi_df[aqi_df.DaysWithAQI >= 328].shape

(753, 18)

In [26]:
# Sory by Very Unhealthy Days
aqi_df.sort_values(by=['VeryUnhealthyDays'],  ascending=False).head(10)

Unnamed: 0,State,County,Year,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days PM2.5,Days PM10
84,California,San Bernardino,2019,365,55,177,71,54,8,0,213,169,80,0,16,246,96,7
35,Arizona,Pinal,2019,365,123,209,21,2,3,7,886,100,63,0,0,168,36,161
557,New Mexico,Dona Ana,2019,365,170,171,17,3,2,2,630,87,51,0,8,263,23,71
68,California,Los Angeles,2019,365,66,213,57,28,1,0,201,136,71,0,36,197,124,8
849,Texas,Harris,2019,365,180,158,22,4,1,0,202,93,51,0,22,155,185,3
101,California,Tulare,2019,365,127,157,78,2,1,0,280,122,66,0,2,242,99,22
65,California,Kern,2019,365,119,151,87,6,1,1,548,122,67,0,2,260,73,30
885,Utah,Uintah,2019,365,223,125,12,4,1,0,205,77,49,0,0,343,22,0
64,California,Inyo,2019,365,232,121,6,2,1,3,638,84,47,0,0,319,14,32
21,Alaska,Fairbanks North Star,2019,365,230,96,18,16,1,4,377,103,38,0,13,131,215,6


In [46]:
# Sory by Unhealthy Days
aqi_df.sort_values(by=['UnhealthyDays'],  ascending=False).head(10)

Unnamed: 0,State,County,Year,DaysWithAQI,GoodDays,ModerateDays,UnhealthyForSensitiveGroupsDays,UnhealthyDays,VeryUnhealthyDays,Hazardous Days,MaxAQI,90thPercentileAQI,MedianAQI,DaysCO,DaysNO2,DaysOzone,DaysPM2.5,DaysPM10
84,California,San Bernardino,2019,365,55,177,71,54,8,0,213,169,80,0,16,246,96,7
81,California,Riverside,2019,365,75,170,91,29,0,0,185,147,77,0,0,247,70,48
68,California,Los Angeles,2019,365,66,213,57,28,1,0,201,136,71,0,36,197,124,8
21,Alaska,Fairbanks North Star,2019,365,230,96,18,16,1,4,377,103,38,0,13,131,215,6
65,California,Kern,2019,365,119,151,87,6,1,1,548,122,67,0,2,260,73,30
71,California,Mariposa,2019,365,221,130,9,5,0,0,176,84,47,0,0,296,68,1
849,Texas,Harris,2019,365,180,158,22,4,1,0,202,93,51,0,22,155,185,3
18,Alaska,Anchorage,2019,365,289,67,5,4,0,0,160,63,27,8,0,0,252,105
879,Utah,Duchesne,2019,365,234,120,7,4,0,0,177,71,48,0,0,332,33,0
885,Utah,Uintah,2019,365,223,125,12,4,1,0,205,77,49,0,0,343,22,0
