# Exploring the 2019 Air Quality Data

In [1]:
# Interpreting the data
# https://www.epa.gov/outdoor-air-quality-data/about-air-data-reports

In [2]:
# Import dependencies
import pandas as pd
from functools import reduce


In [3]:
# Create the dataframe
file_path = "./Resources/annual_aqi_by_county_2019.csv"
aqi2019_df = pd.read_csv(file_path)
aqi2019_df.head()

Unnamed: 0,State,County,Year,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days PM2.5,Days PM10
0,Alabama,Baldwin,2019,271,237,34,0,0,0,0,80,52,37,0,0,220,51,0
1,Alabama,Clay,2019,107,97,10,0,0,0,0,67,50,30,0,0,0,107,0
2,Alabama,Colbert,2019,263,252,11,0,0,0,0,61,47,37,0,0,228,35,0
3,Alabama,DeKalb,2019,361,324,37,0,0,0,0,90,51,39,0,0,331,30,0
4,Alabama,Elmore,2019,228,208,20,0,0,0,0,100,50,39,0,0,228,0,0


In [4]:
# View all column names
aqi2019_df.columns.tolist()

['State',
 'County',
 'Year',
 'Days with AQI',
 'Good Days',
 'Moderate Days',
 'Unhealthy for Sensitive Groups Days',
 'Unhealthy Days',
 'Very Unhealthy Days',
 'Hazardous Days',
 'Max AQI',
 '90th Percentile AQI',
 'Median AQI',
 'Days CO',
 'Days NO2',
 'Days Ozone',
 'Days PM2.5',
 'Days PM10']

In [5]:
dict = {'Days with AQI': 'Days_With_AQI',
        'Good Days': 'Good_Days',
        'Moderate Days': 'Moderate_Days',
        'Unhealthy for Sensitive Groups Days': 'Unhealthy_For_Sensitive_Groups_Days',
        'Unhealthy Days': 'Unhealthy_Days',
        'Very Unhealthy Days': 'Very_Unhealthy_Days',
        'Max AQI': 'Max_AQI',
        '90th Percentile AQI': '90th_Percentile_AQI',
        'Median AQI': 'Median_AQI',
        'Days CO': 'Days_CO',
        'Days NO2': 'Days_NO2',
        'Days Ozone': 'Days_Ozone',
        'Days PM2.5': 'Days_PM2.5',
        'Days PM10': 'Days_PM10'
       }
 
# call rename () method
aqi2019_df.rename(columns=dict,
          inplace=True)
 
# print Data frame after rename columns
aqi2019_df.head()

Unnamed: 0,State,County,Year,Days_With_AQI,Good_Days,Moderate_Days,Unhealthy_For_Sensitive_Groups_Days,Unhealthy_Days,Very_Unhealthy_Days,Hazardous Days,Max_AQI,90th_Percentile_AQI,Median_AQI,Days_CO,Days_NO2,Days_Ozone,Days_PM2.5,Days_PM10
0,Alabama,Baldwin,2019,271,237,34,0,0,0,0,80,52,37,0,0,220,51,0
1,Alabama,Clay,2019,107,97,10,0,0,0,0,67,50,30,0,0,0,107,0
2,Alabama,Colbert,2019,263,252,11,0,0,0,0,61,47,37,0,0,228,35,0
3,Alabama,DeKalb,2019,361,324,37,0,0,0,0,90,51,39,0,0,331,30,0
4,Alabama,Elmore,2019,228,208,20,0,0,0,0,100,50,39,0,0,228,0,0


In [6]:
# Days with AQI Info
'''Number of days in the year having an Air Quality Index value. 
This is the number of days on which measurements from any monitoring site in the county 
or MSA were reported to the AQS database.'''

# Summary statistics
aqi2019_df["Days_With_AQI"].describe()

count    1019.000000
mean      319.996075
std        80.606298
min         2.000000
25%       315.500000
50%       362.000000
75%       365.000000
max       365.000000
Name: Days_With_AQI, dtype: float64

In [7]:
# Sort by Days with AQI
aqi2019_df.sort_values(by=['Days_With_AQI'], ascending=False).head(10)

Unnamed: 0,State,County,Year,Days_With_AQI,Good_Days,Moderate_Days,Unhealthy_For_Sensitive_Groups_Days,Unhealthy_Days,Very_Unhealthy_Days,Hazardous Days,Max_AQI,90th_Percentile_AQI,Median_AQI,Days_CO,Days_NO2,Days_Ozone,Days_PM2.5,Days_PM10
509,Montana,Sanders,2019,365,330,35,0,0,0,0,94,50,25,0,0,0,346,19
334,Kentucky,Edmonson,2019,365,290,75,0,0,0,0,77,56,41,0,0,231,134,0
359,Louisiana,Jefferson,2019,365,310,55,0,0,0,0,84,54,38,0,6,268,91,0
358,Louisiana,Iberville,2019,365,306,55,4,0,0,0,119,58,37,0,1,316,48,0
357,Louisiana,East Baton Rouge,2019,365,260,103,2,0,0,0,108,68,42,0,8,178,179,0
356,Louisiana,Calcasieu,2019,365,239,126,0,0,0,0,90,63,44,0,1,136,228,0
758,Pennsylvania,Mercer,2019,365,290,75,0,0,0,0,95,57,41,0,0,259,106,0
762,Pennsylvania,Philadelphia,2019,365,244,115,6,0,0,0,140,71,44,0,30,206,129,0
766,Pennsylvania,Washington,2019,365,252,113,0,0,0,0,94,64,44,0,0,205,160,0
769,Pennsylvania,York,2019,365,299,66,0,0,0,0,93,56,41,0,22,232,111,0


In [8]:
# Sort by Days with AQI
aqi2019_df.sort_values(by=['Days_With_AQI']).head(10)

Unnamed: 0,State,County,Year,Days_With_AQI,Good_Days,Moderate_Days,Unhealthy_For_Sensitive_Groups_Days,Unhealthy_Days,Very_Unhealthy_Days,Hazardous Days,Max_AQI,90th_Percentile_AQI,Median_AQI,Days_CO,Days_NO2,Days_Ozone,Days_PM2.5,Days_PM10
775,Puerto Rico,Guayama,2019,2,2,0,0,0,0,0,29,29,29,0,0,0,0,2
788,South Carolina,Colleton,2019,7,7,0,0,0,0,0,32,32,29,0,0,5,2,0
693,Oklahoma,Grant,2019,27,27,0,0,0,0,0,32,31,16,0,0,0,27,0
768,Pennsylvania,Wyoming,2019,27,19,8,0,0,0,0,85,63,29,0,0,0,27,0
115,Colorado,Fremont,2019,32,32,0,0,0,0,0,29,23,13,0,0,0,0,32
141,Country Of Mexico,BAJA CALIFORNIA NORTE,2019,32,5,19,8,0,0,0,144,117,72,0,0,0,0,32
696,Oklahoma,Le Flore,2019,38,35,3,0,0,0,0,79,48,28,0,0,0,34,4
43,Arkansas,Craighead,2019,49,43,6,0,0,0,0,61,53,30,0,0,0,49,0
912,Virginia,Hopewell City,2019,57,57,0,0,0,0,0,19,15,8,0,0,0,0,57
894,Virgin Islands,St Croix,2019,57,51,6,0,0,0,0,71,59,20,0,0,0,57,0


In [9]:
# Sort by Days with AQI and determine the number of counties with 
# a measurement for every day of the year
aqi2019_df[aqi2019_df.Days_With_AQI >= 365].shape

(370, 18)

In [10]:
# Sort by Days with AQI and determine the number of counties with 
# a measurement for 90% of days in the year
aqi2019_df[aqi2019_df.Days_With_AQI >= 328].shape

(753, 18)

In [11]:
# Sort by Very Unhealthy Days
aqi2019_df.sort_values(by=['Very_Unhealthy_Days'],  ascending=False).head(10)

Unnamed: 0,State,County,Year,Days_With_AQI,Good_Days,Moderate_Days,Unhealthy_For_Sensitive_Groups_Days,Unhealthy_Days,Very_Unhealthy_Days,Hazardous Days,Max_AQI,90th_Percentile_AQI,Median_AQI,Days_CO,Days_NO2,Days_Ozone,Days_PM2.5,Days_PM10
84,California,San Bernardino,2019,365,55,177,71,54,8,0,213,169,80,0,16,246,96,7
35,Arizona,Pinal,2019,365,123,209,21,2,3,7,886,100,63,0,0,168,36,161
557,New Mexico,Dona Ana,2019,365,170,171,17,3,2,2,630,87,51,0,8,263,23,71
68,California,Los Angeles,2019,365,66,213,57,28,1,0,201,136,71,0,36,197,124,8
849,Texas,Harris,2019,365,180,158,22,4,1,0,202,93,51,0,22,155,185,3
101,California,Tulare,2019,365,127,157,78,2,1,0,280,122,66,0,2,242,99,22
65,California,Kern,2019,365,119,151,87,6,1,1,548,122,67,0,2,260,73,30
885,Utah,Uintah,2019,365,223,125,12,4,1,0,205,77,49,0,0,343,22,0
64,California,Inyo,2019,365,232,121,6,2,1,3,638,84,47,0,0,319,14,32
21,Alaska,Fairbanks North Star,2019,365,230,96,18,16,1,4,377,103,38,0,13,131,215,6


In [12]:
# Sort by Unhealthy Days
aqi2019_df.sort_values(by=['Unhealthy_Days'],  ascending=False).head(10)

Unnamed: 0,State,County,Year,Days_With_AQI,Good_Days,Moderate_Days,Unhealthy_For_Sensitive_Groups_Days,Unhealthy_Days,Very_Unhealthy_Days,Hazardous Days,Max_AQI,90th_Percentile_AQI,Median_AQI,Days_CO,Days_NO2,Days_Ozone,Days_PM2.5,Days_PM10
84,California,San Bernardino,2019,365,55,177,71,54,8,0,213,169,80,0,16,246,96,7
81,California,Riverside,2019,365,75,170,91,29,0,0,185,147,77,0,0,247,70,48
68,California,Los Angeles,2019,365,66,213,57,28,1,0,201,136,71,0,36,197,124,8
21,Alaska,Fairbanks North Star,2019,365,230,96,18,16,1,4,377,103,38,0,13,131,215,6
65,California,Kern,2019,365,119,151,87,6,1,1,548,122,67,0,2,260,73,30
71,California,Mariposa,2019,365,221,130,9,5,0,0,176,84,47,0,0,296,68,1
849,Texas,Harris,2019,365,180,158,22,4,1,0,202,93,51,0,22,155,185,3
18,Alaska,Anchorage,2019,365,289,67,5,4,0,0,160,63,27,8,0,0,252,105
879,Utah,Duchesne,2019,365,234,120,7,4,0,0,177,71,48,0,0,332,33,0
885,Utah,Uintah,2019,365,223,125,12,4,1,0,205,77,49,0,0,343,22,0


In [13]:
# Sort by Moderate Days
aqi2019_df.sort_values(by=['Moderate_Days'],  ascending=False).head(10)

Unnamed: 0,State,County,Year,Days_With_AQI,Good_Days,Moderate_Days,Unhealthy_For_Sensitive_Groups_Days,Unhealthy_Days,Very_Unhealthy_Days,Hazardous Days,Max_AQI,90th_Percentile_AQI,Median_AQI,Days_CO,Days_NO2,Days_Ozone,Days_PM2.5,Days_PM10
31,Arizona,Maricopa,2019,365,66,261,38,0,0,0,147,101,65,0,3,215,71,76
85,California,San Diego,2019,365,87,253,23,2,0,0,169,93,64,0,2,176,184,3
63,California,Imperial,2019,365,125,219,19,2,0,0,185,93,58,0,1,210,79,75
68,California,Los Angeles,2019,365,66,213,57,28,1,0,201,136,71,0,36,197,124,8
35,Arizona,Pinal,2019,365,123,209,21,2,3,7,886,100,63,0,0,168,36,161
731,Pennsylvania,Allegheny,2019,365,146,208,7,4,0,0,161,79,54,0,0,103,262,0
283,Indiana,Marion,2019,365,153,208,4,0,0,0,112,72,53,0,0,110,255,0
242,Illinois,Cook,2019,365,150,206,9,0,0,0,133,76,53,0,26,91,238,10
524,Nevada,Clark,2019,365,154,206,5,0,0,0,122,87,54,1,20,252,84,8
845,Texas,El Paso,2019,365,147,205,11,2,0,0,157,84,53,0,21,204,137,3


In [14]:
# Sort by Days of Ozone
aqi2019_df.sort_values(by=['Days_Ozone'],  ascending=False).head(10)

Unnamed: 0,State,County,Year,Days_With_AQI,Good_Days,Moderate_Days,Unhealthy_For_Sensitive_Groups_Days,Unhealthy_Days,Very_Unhealthy_Days,Hazardous Days,Max_AQI,90th_Percentile_AQI,Median_AQI,Days_CO,Days_NO2,Days_Ozone,Days_PM2.5,Days_PM10
297,Indiana,Warrick,2019,365,337,28,0,0,0,0,93,50,36,0,0,365,0,0
289,Indiana,Shelby,2019,365,338,27,0,0,0,0,84,49,36,0,0,365,0,0
167,Florida,Manatee,2019,365,333,30,2,0,0,0,108,49,36,0,0,365,0,0
265,Indiana,Brown,2019,365,349,16,0,0,0,0,74,47,35,0,0,365,0,0
486,Missouri,Monroe,2019,365,357,8,0,0,0,0,71,45,35,0,0,365,0,0
840,Texas,Collin,2019,365,309,48,7,1,0,0,156,58,40,0,0,365,0,0
124,Colorado,Montezuma,2019,365,279,86,0,0,0,0,97,61,46,0,0,365,0,0
176,Florida,Pasco,2019,365,336,29,0,0,0,0,97,49,36,0,0,365,0,0
174,Florida,Osceola,2019,364,338,22,4,0,0,0,122,48,34,0,0,364,0,0
369,Louisiana,St. John the Baptist,2019,364,334,30,0,0,0,0,93,48,33,0,0,364,0,0


In [15]:
# Sort by Days of PM2.5
aqi2019_df.sort_values(by=['Days_PM2.5'],  ascending=False).head(10)

Unnamed: 0,State,County,Year,Days_With_AQI,Good_Days,Moderate_Days,Unhealthy_For_Sensitive_Groups_Days,Unhealthy_Days,Very_Unhealthy_Days,Hazardous Days,Max_AQI,90th_Percentile_AQI,Median_AQI,Days_CO,Days_NO2,Days_Ozone,Days_PM2.5,Days_PM10
722,Oregon,Linn,2019,365,280,83,2,0,0,0,104,62,28,0,0,0,365,0
983,Wisconsin,Grant,2019,365,304,60,1,0,0,0,104,57,29,0,0,0,365,0
932,Washington,Chelan,2019,365,298,66,1,0,0,0,102,56,24,0,0,0,365,0
742,Pennsylvania,Cumberland,2019,365,268,92,5,0,0,0,150,64,38,0,0,0,365,0
935,Washington,Columbia,2019,365,340,25,0,0,0,0,68,42,17,0,0,0,365,0
939,Washington,Grant,2019,365,339,26,0,0,0,0,67,45,18,0,0,0,365,0
941,Washington,Jefferson,2019,365,364,1,0,0,0,0,51,32,19,0,0,0,365,0
944,Washington,Kittitas,2019,365,305,60,0,0,0,0,96,56,23,0,0,0,365,0
945,Washington,Klickitat,2019,365,339,26,0,0,0,0,96,45,20,0,0,0,365,0
946,Washington,Lewis,2019,365,343,22,0,0,0,0,62,45,20,0,0,0,365,0


In [16]:
# Sort by Median AQI
aqi2019_df.sort_values(by=['Median_AQI'],  ascending=False).head(10)

Unnamed: 0,State,County,Year,Days_With_AQI,Good_Days,Moderate_Days,Unhealthy_For_Sensitive_Groups_Days,Unhealthy_Days,Very_Unhealthy_Days,Hazardous Days,Max_AQI,90th_Percentile_AQI,Median_AQI,Days_CO,Days_NO2,Days_Ozone,Days_PM2.5,Days_PM10
84,California,San Bernardino,2019,365,55,177,71,54,8,0,213,169,80,0,16,246,96,7
81,California,Riverside,2019,365,75,170,91,29,0,0,185,147,77,0,0,247,70,48
141,Country Of Mexico,BAJA CALIFORNIA NORTE,2019,32,5,19,8,0,0,0,144,117,72,0,0,0,0,32
68,California,Los Angeles,2019,365,66,213,57,28,1,0,201,136,71,0,36,197,124,8
65,California,Kern,2019,365,119,151,87,6,1,1,548,122,67,0,2,260,73,30
101,California,Tulare,2019,365,127,157,78,2,1,0,280,122,66,0,2,242,99,22
31,Arizona,Maricopa,2019,365,66,261,38,0,0,0,147,101,65,0,3,215,71,76
85,California,San Diego,2019,365,87,253,23,2,0,0,169,93,64,0,2,176,184,3
35,Arizona,Pinal,2019,365,123,209,21,2,3,7,886,100,63,0,0,168,36,161
60,California,Fresno,2019,365,139,160,64,2,0,0,187,115,60,0,3,259,80,23


In [17]:
# Sort by Max AQI
aqi2019_df.sort_values(by=['Max_AQI'],  ascending=False).head(10)

Unnamed: 0,State,County,Year,Days_With_AQI,Good_Days,Moderate_Days,Unhealthy_For_Sensitive_Groups_Days,Unhealthy_Days,Very_Unhealthy_Days,Hazardous Days,Max_AQI,90th_Percentile_AQI,Median_AQI,Days_CO,Days_NO2,Days_Ozone,Days_PM2.5,Days_PM10
74,California,Mono,2019,365,270,80,6,0,0,9,3852,81,27,0,0,0,305,60
35,Arizona,Pinal,2019,365,123,209,21,2,3,7,886,100,63,0,0,168,36,161
64,California,Inyo,2019,365,232,121,6,2,1,3,638,84,47,0,0,319,14,32
557,New Mexico,Dona Ana,2019,365,170,171,17,3,2,2,630,87,51,0,8,263,23,71
562,New Mexico,Luna,2019,361,349,10,1,0,0,1,617,29,12,0,0,0,0,361
65,California,Kern,2019,365,119,151,87,6,1,1,548,122,67,0,2,260,73,30
66,California,Kings,2019,365,152,172,37,2,0,2,507,105,56,0,0,190,103,72
21,Alaska,Fairbanks North Star,2019,365,230,96,18,16,1,4,377,103,38,0,13,131,215,6
101,California,Tulare,2019,365,127,157,78,2,1,0,280,122,66,0,2,242,99,22
84,California,San Bernardino,2019,365,55,177,71,54,8,0,213,169,80,0,16,246,96,7


In [18]:
# Look up the top 10 counties with highest levels of COPD
aqi2019_df.loc[aqi2019_df['County'].isin(
    ['Lee',
     'Clay',
     'Kusilvak',
     'Bell',
     'McDowell',
     'Wolfe',
     'Harlan',
     'Leslie',
     'McCreary',
     'Knott'])]

Unnamed: 0,State,County,Year,Days_With_AQI,Good_Days,Moderate_Days,Unhealthy_For_Sensitive_Groups_Days,Unhealthy_Days,Very_Unhealthy_Days,Hazardous Days,Max_AQI,90th_Percentile_AQI,Median_AQI,Days_CO,Days_NO2,Days_Ozone,Days_PM2.5,Days_PM10
1,Alabama,Clay,2019,107,97,10,0,0,0,0,67,50,30,0,0,0,107,0
164,Florida,Lee,2019,365,328,36,1,0,0,0,108,51,36,0,0,251,110,4
305,Iowa,Lee,2019,120,101,19,0,0,0,0,70,58,29,0,0,0,120,0
326,Kentucky,Bell,2019,263,243,20,0,0,0,0,77,49,39,0,0,234,29,0
471,Mississippi,Lee,2019,241,222,19,0,0,0,0,84,49,39,0,0,241,0,0
479,Missouri,Clay,2019,365,295,70,0,0,0,0,90,59,40,0,0,225,140,0
834,Texas,Bell,2019,365,284,79,2,0,0,0,105,58,42,0,1,265,99,0


# Loading in Data from 2009-2019

In [19]:
# Create the dataframe
file_path = "./Resources/annual_aqi_by_county_2009.csv"
aqi2009_df = pd.read_csv(file_path)
aqi2009_df.head()

Unnamed: 0,State,County,Year,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days PM2.5,Days PM10
0,Alabama,Baldwin,2009,252,218,32,2,0,0,0,136,53,36,0,0,200,52,0
1,Alabama,Clay,2009,119,97,22,0,0,0,0,94,59,33,0,0,0,119,0
2,Alabama,Colbert,2009,323,220,103,0,0,0,0,76,60,43,0,0,132,191,0
3,Alabama,DeKalb,2009,363,311,52,0,0,0,0,100,54,36,0,0,308,55,0
4,Alabama,Elmore,2009,244,228,16,0,0,0,0,80,49,36,0,0,244,0,0


In [20]:
# Create the dataframe
file_path = "./Resources/annual_aqi_by_county_2010.csv"
aqi2010_df = pd.read_csv(file_path)
aqi2010_df.head()

Unnamed: 0,State,County,Year,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days PM2.5,Days PM10
0,Alabama,Baldwin,2010,274,188,80,6,0,0,0,150,71,43,0,0,192,82,0
1,Alabama,Clay,2010,113,79,34,0,0,0,0,86,60,42,0,0,0,113,0
2,Alabama,Colbert,2010,355,208,146,1,0,0,0,105,67,47,0,0,114,241,0
3,Alabama,DeKalb,2010,358,248,106,4,0,0,0,101,74,43,0,0,303,55,0
4,Alabama,Elmore,2010,241,181,55,5,0,0,0,112,71,43,0,0,241,0,0


In [21]:
# Create the dataframe
file_path = "./Resources/annual_aqi_by_county_2011.csv"
aqi2011_df = pd.read_csv(file_path)
aqi2011_df.head()

Unnamed: 0,State,County,Year,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days PM2.5,Days PM10
0,Alabama,Baldwin,2011,282,204,69,9,0,0,0,126,74,42,0,0,209,73,0
1,Alabama,Clay,2011,118,80,38,0,0,0,0,86,63,39,0,0,0,118,0
2,Alabama,Colbert,2011,279,214,64,1,0,0,0,101,61,42,0,0,197,82,0
3,Alabama,DeKalb,2011,336,257,78,1,0,0,0,101,63,40,0,0,271,65,0
4,Alabama,Elmore,2011,242,192,48,2,0,0,0,105,64,44,0,0,242,0,0


In [22]:
# Create the dataframe
file_path = "./Resources/annual_aqi_by_county_2012.csv"
aqi2012_df = pd.read_csv(file_path)
aqi2012_df.head()

Unnamed: 0,State,County,Year,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days PM2.5,Days PM10
0,Alabama,Baldwin,2012,284,226,56,2,0,0,0,112,61,38,0,0,210,74,0
1,Alabama,Clay,2012,121,99,22,0,0,0,0,72,56,35,0,0,0,121,0
2,Alabama,Colbert,2012,283,222,55,6,0,0,0,136,62,40,0,0,209,74,0
3,Alabama,DeKalb,2012,361,282,74,5,0,0,0,115,64,40,0,0,320,41,0
4,Alabama,Elmore,2012,245,212,33,0,0,0,0,100,54,40,0,0,245,0,0


In [23]:
# Create the dataframe
file_path = "./Resources/annual_aqi_by_county_2013.csv"
aqi2013_df = pd.read_csv(file_path)
aqi2013_df.head()

Unnamed: 0,State,County,Year,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days PM2.5,Days PM10
0,Alabama,Baldwin,2013,273,235,38,0,0,0,0,87,54,36,0,0,200,73,0
1,Alabama,Clay,2013,118,100,18,0,0,0,0,65,52,32,0,0,0,118,0
2,Alabama,Colbert,2013,285,252,33,0,0,0,0,80,51,38,0,0,202,83,0
3,Alabama,DeKalb,2013,360,319,41,0,0,0,0,93,52,38,0,0,305,55,0
4,Alabama,Elmore,2013,244,229,15,0,0,0,0,87,48,36,0,0,244,0,0


In [24]:
# Create the dataframe
file_path = "./Resources/annual_aqi_by_county_2014.csv"
aqi2014_df = pd.read_csv(file_path)
aqi2014_df.head()

Unnamed: 0,State,County,Year,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days PM2.5,Days PM10
0,Alabama,Baldwin,2014,280,232,46,2,0,0,0,112,59,38,0,0,205,75,0
1,Alabama,Clay,2014,114,96,18,0,0,0,0,70,54,36,0,0,0,114,0
2,Alabama,Colbert,2014,284,251,33,0,0,0,0,87,52,37,0,0,206,78,0
3,Alabama,DeKalb,2014,360,313,47,0,0,0,0,90,54,38,0,0,300,60,0
4,Alabama,Elmore,2014,245,224,21,0,0,0,0,84,49,36,0,0,245,0,0


In [25]:
# Create the dataframe
file_path = "./Resources/annual_aqi_by_county_2015.csv"
aqi2015_df = pd.read_csv(file_path)
aqi2015_df.head()

Unnamed: 0,State,County,Year,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days PM2.5,Days PM10
0,Alabama,Baldwin,2015,264,230,33,1,0,0,0,129,53,38,0,0,189,75,0
1,Alabama,Clay,2015,112,101,11,0,0,0,0,91,50,32,0,0,0,112,0
2,Alabama,Colbert,2015,280,251,29,0,0,0,0,73,51,36,0,0,195,85,0
3,Alabama,DeKalb,2015,363,319,43,1,0,0,0,101,52,37,0,0,307,56,0
4,Alabama,Elmore,2015,233,223,9,1,0,0,0,115,47,35,0,0,233,0,0


In [26]:
# Create the dataframe
file_path = "./Resources/annual_aqi_by_county_2016.csv"
aqi2016_df = pd.read_csv(file_path)
aqi2016_df.head()

Unnamed: 0,State,County,Year,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days PM2.5,Days PM10
0,Alabama,Baldwin,2016,279,247,32,0,0,0,0,87,51,37,0,0,221,58,0
1,Alabama,Clay,2016,116,109,7,0,0,0,0,56,45,30,0,0,0,116,0
2,Alabama,Colbert,2016,282,258,23,1,0,0,0,115,50,38,0,0,219,63,0
3,Alabama,DeKalb,2016,348,304,43,1,0,0,0,119,54,40,0,0,321,27,0
4,Alabama,Elmore,2016,117,107,10,0,0,0,0,77,48,40,0,0,117,0,0


In [27]:
# Create the dataframe
file_path = "./Resources/annual_aqi_by_county_2017.csv"
aqi2017_df = pd.read_csv(file_path)
aqi2017_df.head()

Unnamed: 0,State,County,Year,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days PM2.5,Days PM10
0,Alabama,Baldwin,2017,270,241,28,1,0,0,0,108,51,36,0,0,206,64,0
1,Alabama,Clay,2017,118,104,14,0,0,0,0,66,52,30,0,0,0,118,0
2,Alabama,Colbert,2017,283,265,18,0,0,0,0,63,48,37,0,0,218,65,0
3,Alabama,DeKalb,2017,359,329,30,0,0,0,0,80,50,39,0,0,315,44,0
4,Alabama,Elmore,2017,226,221,5,0,0,0,0,58,45,35,0,0,226,0,0


In [28]:
# Create the dataframe
file_path = "./Resources/annual_aqi_by_county_2018.csv"
aqi2018_df = pd.read_csv(file_path)
aqi2018_df.head()

Unnamed: 0,State,County,Year,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days PM2.5,Days PM10
0,Alabama,Baldwin,2018,270,245,25,0,0,0,0,97,50,35,0,0,214,56,0
1,Alabama,Clay,2018,110,103,7,0,0,0,0,64,45,27,0,0,0,110,0
2,Alabama,Colbert,2018,277,251,26,0,0,0,0,93,50,35,0,0,209,68,0
3,Alabama,DeKalb,2018,350,316,34,0,0,0,0,84,50,35,0,0,317,33,0
4,Alabama,Elmore,2018,222,203,19,0,0,0,0,71,49,33,0,0,222,0,0


In [29]:
# Create the dataframe
file_path = "./Resources/annual_aqi_by_county_2019.csv"
aqi2019_df = pd.read_csv(file_path)
aqi2019_df.head()

Unnamed: 0,State,County,Year,Days with AQI,Good Days,Moderate Days,Unhealthy for Sensitive Groups Days,Unhealthy Days,Very Unhealthy Days,Hazardous Days,Max AQI,90th Percentile AQI,Median AQI,Days CO,Days NO2,Days Ozone,Days PM2.5,Days PM10
0,Alabama,Baldwin,2019,271,237,34,0,0,0,0,80,52,37,0,0,220,51,0
1,Alabama,Clay,2019,107,97,10,0,0,0,0,67,50,30,0,0,0,107,0
2,Alabama,Colbert,2019,263,252,11,0,0,0,0,61,47,37,0,0,228,35,0
3,Alabama,DeKalb,2019,361,324,37,0,0,0,0,90,51,39,0,0,331,30,0
4,Alabama,Elmore,2019,228,208,20,0,0,0,0,100,50,39,0,0,228,0,0


# Processing the Dataframes

### Filter for Columns of Interest and Match SQL Schema

In [30]:
# List of columns of interest
columns = ["State", "County", "Year", "Days with AQI", "Good Days", 
                      "Moderate Days", "Unhealthy for Sensitive Groups Days",
                     "Unhealthy Days", "Very Unhealthy Days", "Hazardous Days"]

In [31]:
aqi2009_df = aqi2009_df.filter(columns, axis=1)
aqi2010_df = aqi2010_df.filter(columns, axis=1)
aqi2011_df = aqi2011_df.filter(columns, axis=1)
aqi2012_df = aqi2012_df.filter(columns, axis=1)
aqi2013_df = aqi2013_df.filter(columns, axis=1)
aqi2014_df = aqi2014_df.filter(columns, axis=1)
aqi2015_df = aqi2015_df.filter(columns, axis=1)
aqi2016_df = aqi2016_df.filter(columns, axis=1)
aqi2017_df = aqi2017_df.filter(columns, axis=1)
aqi2018_df = aqi2018_df.filter(columns, axis=1)
aqi2019_df = aqi2019_df.filter(columns, axis=1)

In [32]:
# List of columns to rename
columns = ['State', 'County', 'Year', 'Days_with_AQI', 'Good_Days',
           'Moderate_Days', 'Unhealthy_for_Sensitive_Groups_Days',
           'Unhealthy_Days', 'Very_Unhealthy_Days', 'Hazardous_Days']

In [33]:
aqi2009_df = aqi2009_df.set_axis(columns, axis=1, inplace=False)
aqi2010_df = aqi2010_df.set_axis(columns, axis=1, inplace=False)
aqi2011_df = aqi2011_df.set_axis(columns, axis=1, inplace=False)
aqi2012_df = aqi2012_df.set_axis(columns, axis=1, inplace=False)
aqi2013_df = aqi2013_df.set_axis(columns, axis=1, inplace=False)
aqi2014_df = aqi2014_df.set_axis(columns, axis=1, inplace=False)
aqi2015_df = aqi2015_df.set_axis(columns, axis=1, inplace=False)
aqi2016_df = aqi2016_df.set_axis(columns, axis=1, inplace=False)
aqi2017_df = aqi2017_df.set_axis(columns, axis=1, inplace=False)
aqi2018_df = aqi2018_df.set_axis(columns, axis=1, inplace=False)
aqi2019_df = aqi2019_df.set_axis(columns, axis=1, inplace=False)

In [34]:
# Create a list of all dataframes
dataframes = [aqi2009_df, aqi2010_df, aqi2011_df, 
              aqi2012_df, aqi2013_df, aqi2014_df, 
              aqi2015_df, aqi2016_df, aqi2017_df, 
              aqi2018_df, aqi2019_df]

# Check the shape of all dataframes
for data in dataframes:
    print(data.shape)

(1077, 10)
(1076, 10)
(1071, 10)
(1050, 10)
(1044, 10)
(1036, 10)
(1042, 10)
(1030, 10)
(1029, 10)
(1021, 10)
(1019, 10)


In [35]:
# Check for null values
for data in dataframes:
    print(data.isnull().sum())

State                                  0
County                                 0
Year                                   0
Days_with_AQI                          0
Good_Days                              0
Moderate_Days                          0
Unhealthy_for_Sensitive_Groups_Days    0
Unhealthy_Days                         0
Very_Unhealthy_Days                    0
Hazardous_Days                         0
dtype: int64
State                                  0
County                                 0
Year                                   0
Days_with_AQI                          0
Good_Days                              0
Moderate_Days                          0
Unhealthy_for_Sensitive_Groups_Days    0
Unhealthy_Days                         0
Very_Unhealthy_Days                    0
Hazardous_Days                         0
dtype: int64
State                                  0
County                                 0
Year                                   0
Days_with_AQI                  

In [36]:
# NOTE: From the shape of the dataframe we can see that the Air Quality from different years have
# NOTE: different numbers of rows. In the absence of sensors, data is usually marked 0. 
# NOTE: In addition, there are no missing values in these dataframes. 
# NOTE: Therefore, I think that these differences could be due to redistricting of counties.
# NOTE: In any case, we care about the counties from the year of 2019 for our analysis.

### Make Decade DataFrame Containing 10years of Air Quality Data
- Approach:
    - Merged all dataframes into one dataframe
    - Adjusted column names to match their corresponding year
    - Summed the values for each set of columns into one column 
    - Dropped the old columns

In [37]:
# Create a list of all dataframes to be merged
# NOTE: I have placed 2019 first because this is the dataframe that I want the others matched to
# NOTE: Since the dataframes have different number of counties (possibly due to redistricting) this should
# NOTE: ensure that only the counties we are interested in (counties from year 2019) will be included.
dataframes = [aqi2019_df, aqi2009_df, aqi2010_df, aqi2011_df, 
              aqi2012_df, aqi2013_df, aqi2014_df, 
              aqi2015_df, aqi2016_df, aqi2017_df, 
              aqi2018_df]

# Merge all dataframes 
decade_df = reduce(lambda  left,right: pd.merge(left,right,on=["State", "County"],
                                                how='left'), dataframes)

# Show entire dataframe
pd.set_option('display.max_columns', None)

decade_df

  if sys.path[0] == '':


Unnamed: 0,State,County,Year_x,Days_with_AQI_x,Good_Days_x,Moderate_Days_x,Unhealthy_for_Sensitive_Groups_Days_x,Unhealthy_Days_x,Very_Unhealthy_Days_x,Hazardous_Days_x,Year_y,Days_with_AQI_y,Good_Days_y,Moderate_Days_y,Unhealthy_for_Sensitive_Groups_Days_y,Unhealthy_Days_y,Very_Unhealthy_Days_y,Hazardous_Days_y,Year_x.1,Days_with_AQI_x.1,Good_Days_x.1,Moderate_Days_x.1,Unhealthy_for_Sensitive_Groups_Days_x.1,Unhealthy_Days_x.1,Very_Unhealthy_Days_x.1,Hazardous_Days_x.1,Year_y.1,Days_with_AQI_y.1,Good_Days_y.1,Moderate_Days_y.1,Unhealthy_for_Sensitive_Groups_Days_y.1,Unhealthy_Days_y.1,Very_Unhealthy_Days_y.1,Hazardous_Days_y.1,Year_x.2,Days_with_AQI_x.2,Good_Days_x.2,Moderate_Days_x.2,Unhealthy_for_Sensitive_Groups_Days_x.2,Unhealthy_Days_x.2,Very_Unhealthy_Days_x.2,Hazardous_Days_x.2,Year_y.2,Days_with_AQI_y.2,Good_Days_y.2,Moderate_Days_y.2,Unhealthy_for_Sensitive_Groups_Days_y.2,Unhealthy_Days_y.2,Very_Unhealthy_Days_y.2,Hazardous_Days_y.2,Year_x.3,Days_with_AQI_x.3,Good_Days_x.3,Moderate_Days_x.3,Unhealthy_for_Sensitive_Groups_Days_x.3,Unhealthy_Days_x.3,Very_Unhealthy_Days_x.3,Hazardous_Days_x.3,Year_y.3,Days_with_AQI_y.3,Good_Days_y.3,Moderate_Days_y.3,Unhealthy_for_Sensitive_Groups_Days_y.3,Unhealthy_Days_y.3,Very_Unhealthy_Days_y.3,Hazardous_Days_y.3,Year_x.4,Days_with_AQI_x.4,Good_Days_x.4,Moderate_Days_x.4,Unhealthy_for_Sensitive_Groups_Days_x.4,Unhealthy_Days_x.4,Very_Unhealthy_Days_x.4,Hazardous_Days_x.4,Year_y.4,Days_with_AQI_y.4,Good_Days_y.4,Moderate_Days_y.4,Unhealthy_for_Sensitive_Groups_Days_y.4,Unhealthy_Days_y.4,Very_Unhealthy_Days_y.4,Hazardous_Days_y.4,Year,Days_with_AQI,Good_Days,Moderate_Days,Unhealthy_for_Sensitive_Groups_Days,Unhealthy_Days,Very_Unhealthy_Days,Hazardous_Days
0,Alabama,Baldwin,2019,271,237,34,0,0,0,0,2009.0,252.0,218.0,32.0,2.0,0.0,0.0,0.0,2010.0,274.0,188.0,80.0,6.0,0.0,0.0,0.0,2011.0,282.0,204.0,69.0,9.0,0.0,0.0,0.0,2012.0,284.0,226.0,56.0,2.0,0.0,0.0,0.0,2013.0,273.0,235.0,38.0,0.0,0.0,0.0,0.0,2014.0,280.0,232.0,46.0,2.0,0.0,0.0,0.0,2015.0,264.0,230.0,33.0,1.0,0.0,0.0,0.0,2016.0,279.0,247.0,32.0,0.0,0.0,0.0,0.0,2017.0,270.0,241.0,28.0,1.0,0.0,0.0,0.0,2018.0,270.0,245.0,25.0,0.0,0.0,0.0,0.0
1,Alabama,Clay,2019,107,97,10,0,0,0,0,2009.0,119.0,97.0,22.0,0.0,0.0,0.0,0.0,2010.0,113.0,79.0,34.0,0.0,0.0,0.0,0.0,2011.0,118.0,80.0,38.0,0.0,0.0,0.0,0.0,2012.0,121.0,99.0,22.0,0.0,0.0,0.0,0.0,2013.0,118.0,100.0,18.0,0.0,0.0,0.0,0.0,2014.0,114.0,96.0,18.0,0.0,0.0,0.0,0.0,2015.0,112.0,101.0,11.0,0.0,0.0,0.0,0.0,2016.0,116.0,109.0,7.0,0.0,0.0,0.0,0.0,2017.0,118.0,104.0,14.0,0.0,0.0,0.0,0.0,2018.0,110.0,103.0,7.0,0.0,0.0,0.0,0.0
2,Alabama,Colbert,2019,263,252,11,0,0,0,0,2009.0,323.0,220.0,103.0,0.0,0.0,0.0,0.0,2010.0,355.0,208.0,146.0,1.0,0.0,0.0,0.0,2011.0,279.0,214.0,64.0,1.0,0.0,0.0,0.0,2012.0,283.0,222.0,55.0,6.0,0.0,0.0,0.0,2013.0,285.0,252.0,33.0,0.0,0.0,0.0,0.0,2014.0,284.0,251.0,33.0,0.0,0.0,0.0,0.0,2015.0,280.0,251.0,29.0,0.0,0.0,0.0,0.0,2016.0,282.0,258.0,23.0,1.0,0.0,0.0,0.0,2017.0,283.0,265.0,18.0,0.0,0.0,0.0,0.0,2018.0,277.0,251.0,26.0,0.0,0.0,0.0,0.0
3,Alabama,DeKalb,2019,361,324,37,0,0,0,0,2009.0,363.0,311.0,52.0,0.0,0.0,0.0,0.0,2010.0,358.0,248.0,106.0,4.0,0.0,0.0,0.0,2011.0,336.0,257.0,78.0,1.0,0.0,0.0,0.0,2012.0,361.0,282.0,74.0,5.0,0.0,0.0,0.0,2013.0,360.0,319.0,41.0,0.0,0.0,0.0,0.0,2014.0,360.0,313.0,47.0,0.0,0.0,0.0,0.0,2015.0,363.0,319.0,43.0,1.0,0.0,0.0,0.0,2016.0,348.0,304.0,43.0,1.0,0.0,0.0,0.0,2017.0,359.0,329.0,30.0,0.0,0.0,0.0,0.0,2018.0,350.0,316.0,34.0,0.0,0.0,0.0,0.0
4,Alabama,Elmore,2019,228,208,20,0,0,0,0,2009.0,244.0,228.0,16.0,0.0,0.0,0.0,0.0,2010.0,241.0,181.0,55.0,5.0,0.0,0.0,0.0,2011.0,242.0,192.0,48.0,2.0,0.0,0.0,0.0,2012.0,245.0,212.0,33.0,0.0,0.0,0.0,0.0,2013.0,244.0,229.0,15.0,0.0,0.0,0.0,0.0,2014.0,245.0,224.0,21.0,0.0,0.0,0.0,0.0,2015.0,233.0,223.0,9.0,1.0,0.0,0.0,0.0,2016.0,117.0,107.0,10.0,0.0,0.0,0.0,0.0,2017.0,226.0,221.0,5.0,0.0,0.0,0.0,0.0,2018.0,222.0,203.0,19.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1014,Wyoming,Sublette,2019,365,266,88,8,3,0,0,2009.0,365.0,277.0,88.0,0.0,0.0,0.0,0.0,2010.0,365.0,250.0,113.0,2.0,0.0,0.0,0.0,2011.0,365.0,214.0,134.0,12.0,2.0,3.0,0.0,2012.0,366.0,206.0,145.0,9.0,6.0,0.0,0.0,2013.0,365.0,282.0,83.0,0.0,0.0,0.0,0.0,2014.0,365.0,286.0,79.0,0.0,0.0,0.0,0.0,2015.0,365.0,297.0,67.0,1.0,0.0,0.0,0.0,2016.0,366.0,297.0,68.0,1.0,0.0,0.0,0.0,2017.0,365.0,228.0,129.0,8.0,0.0,0.0,0.0,2018.0,365.0,233.0,132.0,0.0,0.0,0.0,0.0
1015,Wyoming,Sweetwater,2019,365,262,100,3,0,0,0,2009.0,365.0,280.0,83.0,1.0,1.0,0.0,0.0,2010.0,365.0,255.0,107.0,3.0,0.0,0.0,0.0,2011.0,365.0,231.0,129.0,5.0,0.0,0.0,0.0,2012.0,366.0,216.0,142.0,7.0,0.0,1.0,0.0,2013.0,365.0,253.0,109.0,2.0,0.0,0.0,1.0,2014.0,365.0,247.0,110.0,6.0,2.0,0.0,0.0,2015.0,365.0,256.0,105.0,4.0,0.0,0.0,0.0,2016.0,366.0,304.0,62.0,0.0,0.0,0.0,0.0,2017.0,365.0,260.0,100.0,2.0,2.0,1.0,0.0,2018.0,365.0,210.0,147.0,7.0,1.0,0.0,0.0
1016,Wyoming,Teton,2019,365,326,39,0,0,0,0,2009.0,365.0,327.0,36.0,1.0,1.0,0.0,0.0,2010.0,365.0,281.0,84.0,0.0,0.0,0.0,0.0,2011.0,365.0,275.0,90.0,0.0,0.0,0.0,0.0,2012.0,366.0,258.0,105.0,3.0,0.0,0.0,0.0,2013.0,365.0,310.0,55.0,0.0,0.0,0.0,0.0,2014.0,365.0,320.0,45.0,0.0,0.0,0.0,0.0,2015.0,365.0,325.0,39.0,1.0,0.0,0.0,0.0,2016.0,366.0,331.0,35.0,0.0,0.0,0.0,0.0,2017.0,365.0,299.0,65.0,1.0,0.0,0.0,0.0,2018.0,365.0,286.0,78.0,1.0,0.0,0.0,0.0
1017,Wyoming,Uinta,2019,365,315,49,1,0,0,0,2009.0,365.0,341.0,24.0,0.0,0.0,0.0,0.0,2010.0,364.0,286.0,76.0,2.0,0.0,0.0,0.0,2011.0,365.0,322.0,43.0,0.0,0.0,0.0,0.0,2012.0,366.0,254.0,111.0,1.0,0.0,0.0,0.0,2013.0,365.0,312.0,53.0,0.0,0.0,0.0,0.0,2014.0,365.0,331.0,34.0,0.0,0.0,0.0,0.0,2015.0,365.0,316.0,47.0,2.0,0.0,0.0,0.0,2016.0,366.0,341.0,25.0,0.0,0.0,0.0,0.0,2017.0,365.0,310.0,53.0,1.0,0.0,0.0,1.0,2018.0,365.0,287.0,77.0,1.0,0.0,0.0,0.0


In [38]:
decade_df.columns.tolist()

['State',
 'County',
 'Year_x',
 'Days_with_AQI_x',
 'Good_Days_x',
 'Moderate_Days_x',
 'Unhealthy_for_Sensitive_Groups_Days_x',
 'Unhealthy_Days_x',
 'Very_Unhealthy_Days_x',
 'Hazardous_Days_x',
 'Year_y',
 'Days_with_AQI_y',
 'Good_Days_y',
 'Moderate_Days_y',
 'Unhealthy_for_Sensitive_Groups_Days_y',
 'Unhealthy_Days_y',
 'Very_Unhealthy_Days_y',
 'Hazardous_Days_y',
 'Year_x',
 'Days_with_AQI_x',
 'Good_Days_x',
 'Moderate_Days_x',
 'Unhealthy_for_Sensitive_Groups_Days_x',
 'Unhealthy_Days_x',
 'Very_Unhealthy_Days_x',
 'Hazardous_Days_x',
 'Year_y',
 'Days_with_AQI_y',
 'Good_Days_y',
 'Moderate_Days_y',
 'Unhealthy_for_Sensitive_Groups_Days_y',
 'Unhealthy_Days_y',
 'Very_Unhealthy_Days_y',
 'Hazardous_Days_y',
 'Year_x',
 'Days_with_AQI_x',
 'Good_Days_x',
 'Moderate_Days_x',
 'Unhealthy_for_Sensitive_Groups_Days_x',
 'Unhealthy_Days_x',
 'Very_Unhealthy_Days_x',
 'Hazardous_Days_x',
 'Year_y',
 'Days_with_AQI_y',
 'Good_Days_y',
 'Moderate_Days_y',
 'Unhealthy_for_Sensitive_G

In [39]:
decade_df.columns = ['State',
              'County',
              'Year_2019',
              'Days_with_AQI_2019',
              'Good_Days_2019',
              'Moderate_Days_2019',
              'Unhealthy_for_Sensitive_Groups_Days_2019',
              'Unhealthy_Days_2019',
              'Very_Unhealthy_Days_2019',
              'Hazardous_Days_2019',
              'Year_2009',
              'Days_with_AQI_2009',
              'Good_Days_2009',
              'Moderate_Days_2009',
              'Unhealthy_for_Sensitive_Groups_Days_2009',
              'Unhealthy_Days_2009',
              'Very_Unhealthy_Days_2009',
              'Hazardous_Days_2009',
              'Year_2010',
              'Days_with_AQI_2010',
              'Good_Days_2010',
              'Moderate_Days_2010',
              'Unhealthy_for_Sensitive_Groups_Days_2010',
              'Unhealthy_Days_2010',
              'Very_Unhealthy_Days_2010',
              'Hazardous_Days_2010',
              'Year_2011',
              'Days_with_AQI_2011',
              'Good_Days_2011',
              'Moderate_Days_2011',
              'Unhealthy_for_Sensitive_Groups_Days_2011',
              'Unhealthy_Days_2011',
              'Very_Unhealthy_Days_2011',
              'Hazardous_Days_2011',
              'Year_2012',
              'Days_with_AQI_2012',
              'Good_Days_2012',
              'Moderate_Days_2012',
              'Unhealthy_for_Sensitive_Groups_Days_2012',
              'Unhealthy_Days_2012',
              'Very_Unhealthy_Days_2012',
              'Hazardous_Days_2012',
              'Year_2013',
              'Days_with_AQI_2013',
              'Good_Days_2013',
              'Moderate_Days_2013',
              'Unhealthy_for_Sensitive_Groups_Days_2013',
              'Unhealthy_Days_2013',
              'Very_Unhealthy_Days_2013',
              'Hazardous_Days_2013',
              'Year_2014',
              'Days_with_AQI_2014',
              'Good_Days_2014',
              'Moderate_Days_2014',
              'Unhealthy_for_Sensitive_Groups_Days_2014',
              'Unhealthy_Days_2014',
              'Very_Unhealthy_Days_2014',
              'Hazardous_Days_2014',
              'Year_2015',
              'Days_with_AQI_2015',
              'Good_Days_2015',
              'Moderate_Days_2015',
              'Unhealthy_for_Sensitive_Groups_Days_2015',
              'Unhealthy_Days_2015',
              'Very_Unhealthy_Days_2015',
              'Hazardous_Days_2015',
              'Year_2016',
              'Days_with_AQI_2016',
              'Good_Days_2016',
              'Moderate_Days_2016',
              'Unhealthy_for_Sensitive_Groups_Days_2016',
              'Unhealthy_Days_2016',
              'Very_Unhealthy_Days_2016',
              'Hazardous_Days_2016',
              'Year_2017',
              'Days_with_AQI_2017',
              'Good_Days_2017',
              'Moderate_Days_2017',
              'Unhealthy_for_Sensitive_Groups_Days_2017',
              'Unhealthy_Days_2017',
              'Very_Unhealthy_Days_2017',
              'Hazardous_Days_2017',
              'Year_2018',
              'Days_with_AQI_2018',
              'Good_Days_2018',
              'Moderate_Days_2018',
              'Unhealthy_for_Sensitive_Groups_Days_2018',
              'Unhealthy_Days_2018',
              'Very_Unhealthy_Days_2018',
              'Hazardous_Days_2018'
             ]

In [40]:
decade_df

Unnamed: 0,State,County,Year_2019,Days_with_AQI_2019,Good_Days_2019,Moderate_Days_2019,Unhealthy_for_Sensitive_Groups_Days_2019,Unhealthy_Days_2019,Very_Unhealthy_Days_2019,Hazardous_Days_2019,Year_2009,Days_with_AQI_2009,Good_Days_2009,Moderate_Days_2009,Unhealthy_for_Sensitive_Groups_Days_2009,Unhealthy_Days_2009,Very_Unhealthy_Days_2009,Hazardous_Days_2009,Year_2010,Days_with_AQI_2010,Good_Days_2010,Moderate_Days_2010,Unhealthy_for_Sensitive_Groups_Days_2010,Unhealthy_Days_2010,Very_Unhealthy_Days_2010,Hazardous_Days_2010,Year_2011,Days_with_AQI_2011,Good_Days_2011,Moderate_Days_2011,Unhealthy_for_Sensitive_Groups_Days_2011,Unhealthy_Days_2011,Very_Unhealthy_Days_2011,Hazardous_Days_2011,Year_2012,Days_with_AQI_2012,Good_Days_2012,Moderate_Days_2012,Unhealthy_for_Sensitive_Groups_Days_2012,Unhealthy_Days_2012,Very_Unhealthy_Days_2012,Hazardous_Days_2012,Year_2013,Days_with_AQI_2013,Good_Days_2013,Moderate_Days_2013,Unhealthy_for_Sensitive_Groups_Days_2013,Unhealthy_Days_2013,Very_Unhealthy_Days_2013,Hazardous_Days_2013,Year_2014,Days_with_AQI_2014,Good_Days_2014,Moderate_Days_2014,Unhealthy_for_Sensitive_Groups_Days_2014,Unhealthy_Days_2014,Very_Unhealthy_Days_2014,Hazardous_Days_2014,Year_2015,Days_with_AQI_2015,Good_Days_2015,Moderate_Days_2015,Unhealthy_for_Sensitive_Groups_Days_2015,Unhealthy_Days_2015,Very_Unhealthy_Days_2015,Hazardous_Days_2015,Year_2016,Days_with_AQI_2016,Good_Days_2016,Moderate_Days_2016,Unhealthy_for_Sensitive_Groups_Days_2016,Unhealthy_Days_2016,Very_Unhealthy_Days_2016,Hazardous_Days_2016,Year_2017,Days_with_AQI_2017,Good_Days_2017,Moderate_Days_2017,Unhealthy_for_Sensitive_Groups_Days_2017,Unhealthy_Days_2017,Very_Unhealthy_Days_2017,Hazardous_Days_2017,Year_2018,Days_with_AQI_2018,Good_Days_2018,Moderate_Days_2018,Unhealthy_for_Sensitive_Groups_Days_2018,Unhealthy_Days_2018,Very_Unhealthy_Days_2018,Hazardous_Days_2018
0,Alabama,Baldwin,2019,271,237,34,0,0,0,0,2009.0,252.0,218.0,32.0,2.0,0.0,0.0,0.0,2010.0,274.0,188.0,80.0,6.0,0.0,0.0,0.0,2011.0,282.0,204.0,69.0,9.0,0.0,0.0,0.0,2012.0,284.0,226.0,56.0,2.0,0.0,0.0,0.0,2013.0,273.0,235.0,38.0,0.0,0.0,0.0,0.0,2014.0,280.0,232.0,46.0,2.0,0.0,0.0,0.0,2015.0,264.0,230.0,33.0,1.0,0.0,0.0,0.0,2016.0,279.0,247.0,32.0,0.0,0.0,0.0,0.0,2017.0,270.0,241.0,28.0,1.0,0.0,0.0,0.0,2018.0,270.0,245.0,25.0,0.0,0.0,0.0,0.0
1,Alabama,Clay,2019,107,97,10,0,0,0,0,2009.0,119.0,97.0,22.0,0.0,0.0,0.0,0.0,2010.0,113.0,79.0,34.0,0.0,0.0,0.0,0.0,2011.0,118.0,80.0,38.0,0.0,0.0,0.0,0.0,2012.0,121.0,99.0,22.0,0.0,0.0,0.0,0.0,2013.0,118.0,100.0,18.0,0.0,0.0,0.0,0.0,2014.0,114.0,96.0,18.0,0.0,0.0,0.0,0.0,2015.0,112.0,101.0,11.0,0.0,0.0,0.0,0.0,2016.0,116.0,109.0,7.0,0.0,0.0,0.0,0.0,2017.0,118.0,104.0,14.0,0.0,0.0,0.0,0.0,2018.0,110.0,103.0,7.0,0.0,0.0,0.0,0.0
2,Alabama,Colbert,2019,263,252,11,0,0,0,0,2009.0,323.0,220.0,103.0,0.0,0.0,0.0,0.0,2010.0,355.0,208.0,146.0,1.0,0.0,0.0,0.0,2011.0,279.0,214.0,64.0,1.0,0.0,0.0,0.0,2012.0,283.0,222.0,55.0,6.0,0.0,0.0,0.0,2013.0,285.0,252.0,33.0,0.0,0.0,0.0,0.0,2014.0,284.0,251.0,33.0,0.0,0.0,0.0,0.0,2015.0,280.0,251.0,29.0,0.0,0.0,0.0,0.0,2016.0,282.0,258.0,23.0,1.0,0.0,0.0,0.0,2017.0,283.0,265.0,18.0,0.0,0.0,0.0,0.0,2018.0,277.0,251.0,26.0,0.0,0.0,0.0,0.0
3,Alabama,DeKalb,2019,361,324,37,0,0,0,0,2009.0,363.0,311.0,52.0,0.0,0.0,0.0,0.0,2010.0,358.0,248.0,106.0,4.0,0.0,0.0,0.0,2011.0,336.0,257.0,78.0,1.0,0.0,0.0,0.0,2012.0,361.0,282.0,74.0,5.0,0.0,0.0,0.0,2013.0,360.0,319.0,41.0,0.0,0.0,0.0,0.0,2014.0,360.0,313.0,47.0,0.0,0.0,0.0,0.0,2015.0,363.0,319.0,43.0,1.0,0.0,0.0,0.0,2016.0,348.0,304.0,43.0,1.0,0.0,0.0,0.0,2017.0,359.0,329.0,30.0,0.0,0.0,0.0,0.0,2018.0,350.0,316.0,34.0,0.0,0.0,0.0,0.0
4,Alabama,Elmore,2019,228,208,20,0,0,0,0,2009.0,244.0,228.0,16.0,0.0,0.0,0.0,0.0,2010.0,241.0,181.0,55.0,5.0,0.0,0.0,0.0,2011.0,242.0,192.0,48.0,2.0,0.0,0.0,0.0,2012.0,245.0,212.0,33.0,0.0,0.0,0.0,0.0,2013.0,244.0,229.0,15.0,0.0,0.0,0.0,0.0,2014.0,245.0,224.0,21.0,0.0,0.0,0.0,0.0,2015.0,233.0,223.0,9.0,1.0,0.0,0.0,0.0,2016.0,117.0,107.0,10.0,0.0,0.0,0.0,0.0,2017.0,226.0,221.0,5.0,0.0,0.0,0.0,0.0,2018.0,222.0,203.0,19.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1014,Wyoming,Sublette,2019,365,266,88,8,3,0,0,2009.0,365.0,277.0,88.0,0.0,0.0,0.0,0.0,2010.0,365.0,250.0,113.0,2.0,0.0,0.0,0.0,2011.0,365.0,214.0,134.0,12.0,2.0,3.0,0.0,2012.0,366.0,206.0,145.0,9.0,6.0,0.0,0.0,2013.0,365.0,282.0,83.0,0.0,0.0,0.0,0.0,2014.0,365.0,286.0,79.0,0.0,0.0,0.0,0.0,2015.0,365.0,297.0,67.0,1.0,0.0,0.0,0.0,2016.0,366.0,297.0,68.0,1.0,0.0,0.0,0.0,2017.0,365.0,228.0,129.0,8.0,0.0,0.0,0.0,2018.0,365.0,233.0,132.0,0.0,0.0,0.0,0.0
1015,Wyoming,Sweetwater,2019,365,262,100,3,0,0,0,2009.0,365.0,280.0,83.0,1.0,1.0,0.0,0.0,2010.0,365.0,255.0,107.0,3.0,0.0,0.0,0.0,2011.0,365.0,231.0,129.0,5.0,0.0,0.0,0.0,2012.0,366.0,216.0,142.0,7.0,0.0,1.0,0.0,2013.0,365.0,253.0,109.0,2.0,0.0,0.0,1.0,2014.0,365.0,247.0,110.0,6.0,2.0,0.0,0.0,2015.0,365.0,256.0,105.0,4.0,0.0,0.0,0.0,2016.0,366.0,304.0,62.0,0.0,0.0,0.0,0.0,2017.0,365.0,260.0,100.0,2.0,2.0,1.0,0.0,2018.0,365.0,210.0,147.0,7.0,1.0,0.0,0.0
1016,Wyoming,Teton,2019,365,326,39,0,0,0,0,2009.0,365.0,327.0,36.0,1.0,1.0,0.0,0.0,2010.0,365.0,281.0,84.0,0.0,0.0,0.0,0.0,2011.0,365.0,275.0,90.0,0.0,0.0,0.0,0.0,2012.0,366.0,258.0,105.0,3.0,0.0,0.0,0.0,2013.0,365.0,310.0,55.0,0.0,0.0,0.0,0.0,2014.0,365.0,320.0,45.0,0.0,0.0,0.0,0.0,2015.0,365.0,325.0,39.0,1.0,0.0,0.0,0.0,2016.0,366.0,331.0,35.0,0.0,0.0,0.0,0.0,2017.0,365.0,299.0,65.0,1.0,0.0,0.0,0.0,2018.0,365.0,286.0,78.0,1.0,0.0,0.0,0.0
1017,Wyoming,Uinta,2019,365,315,49,1,0,0,0,2009.0,365.0,341.0,24.0,0.0,0.0,0.0,0.0,2010.0,364.0,286.0,76.0,2.0,0.0,0.0,0.0,2011.0,365.0,322.0,43.0,0.0,0.0,0.0,0.0,2012.0,366.0,254.0,111.0,1.0,0.0,0.0,0.0,2013.0,365.0,312.0,53.0,0.0,0.0,0.0,0.0,2014.0,365.0,331.0,34.0,0.0,0.0,0.0,0.0,2015.0,365.0,316.0,47.0,2.0,0.0,0.0,0.0,2016.0,366.0,341.0,25.0,0.0,0.0,0.0,0.0,2017.0,365.0,310.0,53.0,1.0,0.0,0.0,1.0,2018.0,365.0,287.0,77.0,1.0,0.0,0.0,0.0


In [41]:
# Add the values of the merged columns
decade_df["Days_with_AQI"] = decade_df["Days_with_AQI_2019"] + decade_df["Days_with_AQI_2009"]
+ decade_df["Days_with_AQI_2010"] + decade_df["Days_with_AQI_2011"]
+ decade_df["Days_with_AQI_2012"] + decade_df["Days_with_AQI_2013"]
+ decade_df["Days_with_AQI_2014"] + decade_df["Days_with_AQI_2015"]
+ decade_df["Days_with_AQI_2016"] + decade_df["Days_with_AQI_2017"]
+ decade_df["Days_with_AQI_2018"]


# Drop the old AQI columns
decade_df = decade_df.drop(["Days_with_AQI_2019", "Days_with_AQI_2009",
                           "Days_with_AQI_2010", "Days_with_AQI_2011",
                           "Days_with_AQI_2012", "Days_with_AQI_2013",
                           "Days_with_AQI_2014", "Days_with_AQI_2015",
                           "Days_with_AQI_2016", "Days_with_AQI_2017",
                           "Days_with_AQI_2018"], axis=1)

# View the dataframe
decade_df.head()

Unnamed: 0,State,County,Year_2019,Good_Days_2019,Moderate_Days_2019,Unhealthy_for_Sensitive_Groups_Days_2019,Unhealthy_Days_2019,Very_Unhealthy_Days_2019,Hazardous_Days_2019,Year_2009,Good_Days_2009,Moderate_Days_2009,Unhealthy_for_Sensitive_Groups_Days_2009,Unhealthy_Days_2009,Very_Unhealthy_Days_2009,Hazardous_Days_2009,Year_2010,Good_Days_2010,Moderate_Days_2010,Unhealthy_for_Sensitive_Groups_Days_2010,Unhealthy_Days_2010,Very_Unhealthy_Days_2010,Hazardous_Days_2010,Year_2011,Good_Days_2011,Moderate_Days_2011,Unhealthy_for_Sensitive_Groups_Days_2011,Unhealthy_Days_2011,Very_Unhealthy_Days_2011,Hazardous_Days_2011,Year_2012,Good_Days_2012,Moderate_Days_2012,Unhealthy_for_Sensitive_Groups_Days_2012,Unhealthy_Days_2012,Very_Unhealthy_Days_2012,Hazardous_Days_2012,Year_2013,Good_Days_2013,Moderate_Days_2013,Unhealthy_for_Sensitive_Groups_Days_2013,Unhealthy_Days_2013,Very_Unhealthy_Days_2013,Hazardous_Days_2013,Year_2014,Good_Days_2014,Moderate_Days_2014,Unhealthy_for_Sensitive_Groups_Days_2014,Unhealthy_Days_2014,Very_Unhealthy_Days_2014,Hazardous_Days_2014,Year_2015,Good_Days_2015,Moderate_Days_2015,Unhealthy_for_Sensitive_Groups_Days_2015,Unhealthy_Days_2015,Very_Unhealthy_Days_2015,Hazardous_Days_2015,Year_2016,Good_Days_2016,Moderate_Days_2016,Unhealthy_for_Sensitive_Groups_Days_2016,Unhealthy_Days_2016,Very_Unhealthy_Days_2016,Hazardous_Days_2016,Year_2017,Good_Days_2017,Moderate_Days_2017,Unhealthy_for_Sensitive_Groups_Days_2017,Unhealthy_Days_2017,Very_Unhealthy_Days_2017,Hazardous_Days_2017,Year_2018,Good_Days_2018,Moderate_Days_2018,Unhealthy_for_Sensitive_Groups_Days_2018,Unhealthy_Days_2018,Very_Unhealthy_Days_2018,Hazardous_Days_2018,Days_with_AQI
0,Alabama,Baldwin,2019,237,34,0,0,0,0,2009.0,218.0,32.0,2.0,0.0,0.0,0.0,2010.0,188.0,80.0,6.0,0.0,0.0,0.0,2011.0,204.0,69.0,9.0,0.0,0.0,0.0,2012.0,226.0,56.0,2.0,0.0,0.0,0.0,2013.0,235.0,38.0,0.0,0.0,0.0,0.0,2014.0,232.0,46.0,2.0,0.0,0.0,0.0,2015.0,230.0,33.0,1.0,0.0,0.0,0.0,2016.0,247.0,32.0,0.0,0.0,0.0,0.0,2017.0,241.0,28.0,1.0,0.0,0.0,0.0,2018.0,245.0,25.0,0.0,0.0,0.0,0.0,523.0
1,Alabama,Clay,2019,97,10,0,0,0,0,2009.0,97.0,22.0,0.0,0.0,0.0,0.0,2010.0,79.0,34.0,0.0,0.0,0.0,0.0,2011.0,80.0,38.0,0.0,0.0,0.0,0.0,2012.0,99.0,22.0,0.0,0.0,0.0,0.0,2013.0,100.0,18.0,0.0,0.0,0.0,0.0,2014.0,96.0,18.0,0.0,0.0,0.0,0.0,2015.0,101.0,11.0,0.0,0.0,0.0,0.0,2016.0,109.0,7.0,0.0,0.0,0.0,0.0,2017.0,104.0,14.0,0.0,0.0,0.0,0.0,2018.0,103.0,7.0,0.0,0.0,0.0,0.0,226.0
2,Alabama,Colbert,2019,252,11,0,0,0,0,2009.0,220.0,103.0,0.0,0.0,0.0,0.0,2010.0,208.0,146.0,1.0,0.0,0.0,0.0,2011.0,214.0,64.0,1.0,0.0,0.0,0.0,2012.0,222.0,55.0,6.0,0.0,0.0,0.0,2013.0,252.0,33.0,0.0,0.0,0.0,0.0,2014.0,251.0,33.0,0.0,0.0,0.0,0.0,2015.0,251.0,29.0,0.0,0.0,0.0,0.0,2016.0,258.0,23.0,1.0,0.0,0.0,0.0,2017.0,265.0,18.0,0.0,0.0,0.0,0.0,2018.0,251.0,26.0,0.0,0.0,0.0,0.0,586.0
3,Alabama,DeKalb,2019,324,37,0,0,0,0,2009.0,311.0,52.0,0.0,0.0,0.0,0.0,2010.0,248.0,106.0,4.0,0.0,0.0,0.0,2011.0,257.0,78.0,1.0,0.0,0.0,0.0,2012.0,282.0,74.0,5.0,0.0,0.0,0.0,2013.0,319.0,41.0,0.0,0.0,0.0,0.0,2014.0,313.0,47.0,0.0,0.0,0.0,0.0,2015.0,319.0,43.0,1.0,0.0,0.0,0.0,2016.0,304.0,43.0,1.0,0.0,0.0,0.0,2017.0,329.0,30.0,0.0,0.0,0.0,0.0,2018.0,316.0,34.0,0.0,0.0,0.0,0.0,724.0
4,Alabama,Elmore,2019,208,20,0,0,0,0,2009.0,228.0,16.0,0.0,0.0,0.0,0.0,2010.0,181.0,55.0,5.0,0.0,0.0,0.0,2011.0,192.0,48.0,2.0,0.0,0.0,0.0,2012.0,212.0,33.0,0.0,0.0,0.0,0.0,2013.0,229.0,15.0,0.0,0.0,0.0,0.0,2014.0,224.0,21.0,0.0,0.0,0.0,0.0,2015.0,223.0,9.0,1.0,0.0,0.0,0.0,2016.0,107.0,10.0,0.0,0.0,0.0,0.0,2017.0,221.0,5.0,0.0,0.0,0.0,0.0,2018.0,203.0,19.0,0.0,0.0,0.0,0.0,472.0


In [42]:
# Add the values of the merged columns
decade_df["Good_Days"] = decade_df["Good_Days_2019"] + decade_df["Good_Days_2009"]
+ decade_df["Good_Days_2010"] + decade_df["Good_Days_2011"]
+ decade_df["Good_Days_2012"] + decade_df["Good_Days_2013"]
+ decade_df["Good_Days_2014"] + decade_df["Good_Days_2015"]
+ decade_df["Good_Days_2016"] + decade_df["Good_Days_2017"]
+ decade_df["Good_Days_2018"]


# Drop the old AQI columns
decade_df = decade_df.drop(["Good_Days_2019", "Good_Days_2009",
                           "Good_Days_2010", "Good_Days_2011",
                           "Good_Days_2012", "Good_Days_2013",
                           "Good_Days_2014", "Good_Days_2015",
                           "Good_Days_2016", "Good_Days_2017",
                           "Good_Days_2018"], axis=1)

# View the dataframe
decade_df.head()

Unnamed: 0,State,County,Year_2019,Moderate_Days_2019,Unhealthy_for_Sensitive_Groups_Days_2019,Unhealthy_Days_2019,Very_Unhealthy_Days_2019,Hazardous_Days_2019,Year_2009,Moderate_Days_2009,Unhealthy_for_Sensitive_Groups_Days_2009,Unhealthy_Days_2009,Very_Unhealthy_Days_2009,Hazardous_Days_2009,Year_2010,Moderate_Days_2010,Unhealthy_for_Sensitive_Groups_Days_2010,Unhealthy_Days_2010,Very_Unhealthy_Days_2010,Hazardous_Days_2010,Year_2011,Moderate_Days_2011,Unhealthy_for_Sensitive_Groups_Days_2011,Unhealthy_Days_2011,Very_Unhealthy_Days_2011,Hazardous_Days_2011,Year_2012,Moderate_Days_2012,Unhealthy_for_Sensitive_Groups_Days_2012,Unhealthy_Days_2012,Very_Unhealthy_Days_2012,Hazardous_Days_2012,Year_2013,Moderate_Days_2013,Unhealthy_for_Sensitive_Groups_Days_2013,Unhealthy_Days_2013,Very_Unhealthy_Days_2013,Hazardous_Days_2013,Year_2014,Moderate_Days_2014,Unhealthy_for_Sensitive_Groups_Days_2014,Unhealthy_Days_2014,Very_Unhealthy_Days_2014,Hazardous_Days_2014,Year_2015,Moderate_Days_2015,Unhealthy_for_Sensitive_Groups_Days_2015,Unhealthy_Days_2015,Very_Unhealthy_Days_2015,Hazardous_Days_2015,Year_2016,Moderate_Days_2016,Unhealthy_for_Sensitive_Groups_Days_2016,Unhealthy_Days_2016,Very_Unhealthy_Days_2016,Hazardous_Days_2016,Year_2017,Moderate_Days_2017,Unhealthy_for_Sensitive_Groups_Days_2017,Unhealthy_Days_2017,Very_Unhealthy_Days_2017,Hazardous_Days_2017,Year_2018,Moderate_Days_2018,Unhealthy_for_Sensitive_Groups_Days_2018,Unhealthy_Days_2018,Very_Unhealthy_Days_2018,Hazardous_Days_2018,Days_with_AQI,Good_Days
0,Alabama,Baldwin,2019,34,0,0,0,0,2009.0,32.0,2.0,0.0,0.0,0.0,2010.0,80.0,6.0,0.0,0.0,0.0,2011.0,69.0,9.0,0.0,0.0,0.0,2012.0,56.0,2.0,0.0,0.0,0.0,2013.0,38.0,0.0,0.0,0.0,0.0,2014.0,46.0,2.0,0.0,0.0,0.0,2015.0,33.0,1.0,0.0,0.0,0.0,2016.0,32.0,0.0,0.0,0.0,0.0,2017.0,28.0,1.0,0.0,0.0,0.0,2018.0,25.0,0.0,0.0,0.0,0.0,523.0,455.0
1,Alabama,Clay,2019,10,0,0,0,0,2009.0,22.0,0.0,0.0,0.0,0.0,2010.0,34.0,0.0,0.0,0.0,0.0,2011.0,38.0,0.0,0.0,0.0,0.0,2012.0,22.0,0.0,0.0,0.0,0.0,2013.0,18.0,0.0,0.0,0.0,0.0,2014.0,18.0,0.0,0.0,0.0,0.0,2015.0,11.0,0.0,0.0,0.0,0.0,2016.0,7.0,0.0,0.0,0.0,0.0,2017.0,14.0,0.0,0.0,0.0,0.0,2018.0,7.0,0.0,0.0,0.0,0.0,226.0,194.0
2,Alabama,Colbert,2019,11,0,0,0,0,2009.0,103.0,0.0,0.0,0.0,0.0,2010.0,146.0,1.0,0.0,0.0,0.0,2011.0,64.0,1.0,0.0,0.0,0.0,2012.0,55.0,6.0,0.0,0.0,0.0,2013.0,33.0,0.0,0.0,0.0,0.0,2014.0,33.0,0.0,0.0,0.0,0.0,2015.0,29.0,0.0,0.0,0.0,0.0,2016.0,23.0,1.0,0.0,0.0,0.0,2017.0,18.0,0.0,0.0,0.0,0.0,2018.0,26.0,0.0,0.0,0.0,0.0,586.0,472.0
3,Alabama,DeKalb,2019,37,0,0,0,0,2009.0,52.0,0.0,0.0,0.0,0.0,2010.0,106.0,4.0,0.0,0.0,0.0,2011.0,78.0,1.0,0.0,0.0,0.0,2012.0,74.0,5.0,0.0,0.0,0.0,2013.0,41.0,0.0,0.0,0.0,0.0,2014.0,47.0,0.0,0.0,0.0,0.0,2015.0,43.0,1.0,0.0,0.0,0.0,2016.0,43.0,1.0,0.0,0.0,0.0,2017.0,30.0,0.0,0.0,0.0,0.0,2018.0,34.0,0.0,0.0,0.0,0.0,724.0,635.0
4,Alabama,Elmore,2019,20,0,0,0,0,2009.0,16.0,0.0,0.0,0.0,0.0,2010.0,55.0,5.0,0.0,0.0,0.0,2011.0,48.0,2.0,0.0,0.0,0.0,2012.0,33.0,0.0,0.0,0.0,0.0,2013.0,15.0,0.0,0.0,0.0,0.0,2014.0,21.0,0.0,0.0,0.0,0.0,2015.0,9.0,1.0,0.0,0.0,0.0,2016.0,10.0,0.0,0.0,0.0,0.0,2017.0,5.0,0.0,0.0,0.0,0.0,2018.0,19.0,0.0,0.0,0.0,0.0,472.0,436.0


In [43]:
# Add the values of the merged columns
decade_df["Moderate_Days"] = decade_df["Moderate_Days_2019"] + decade_df["Moderate_Days_2009"]
+ decade_df["Moderate_Days_2010"] + decade_df["Moderate_Days_2011"]
+ decade_df["Moderate_Days_2012"] + decade_df["Moderate_Days_2013"]
+ decade_df["Moderate_Days_2014"] + decade_df["Moderate_Days_2015"]
+ decade_df["Moderate_Days_2016"] + decade_df["Moderate_Days_2017"]
+ decade_df["Moderate_Days_2018"]


# Drop the old AQI columns
decade_df = decade_df.drop(["Moderate_Days_2019", "Moderate_Days_2009",
                           "Moderate_Days_2010", "Moderate_Days_2011",
                           "Moderate_Days_2012", "Moderate_Days_2013",
                           "Moderate_Days_2014", "Moderate_Days_2015",
                           "Moderate_Days_2016", "Moderate_Days_2017",
                           "Moderate_Days_2018"], axis=1)

# View the dataframe
decade_df.head()

Unnamed: 0,State,County,Year_2019,Unhealthy_for_Sensitive_Groups_Days_2019,Unhealthy_Days_2019,Very_Unhealthy_Days_2019,Hazardous_Days_2019,Year_2009,Unhealthy_for_Sensitive_Groups_Days_2009,Unhealthy_Days_2009,Very_Unhealthy_Days_2009,Hazardous_Days_2009,Year_2010,Unhealthy_for_Sensitive_Groups_Days_2010,Unhealthy_Days_2010,Very_Unhealthy_Days_2010,Hazardous_Days_2010,Year_2011,Unhealthy_for_Sensitive_Groups_Days_2011,Unhealthy_Days_2011,Very_Unhealthy_Days_2011,Hazardous_Days_2011,Year_2012,Unhealthy_for_Sensitive_Groups_Days_2012,Unhealthy_Days_2012,Very_Unhealthy_Days_2012,Hazardous_Days_2012,Year_2013,Unhealthy_for_Sensitive_Groups_Days_2013,Unhealthy_Days_2013,Very_Unhealthy_Days_2013,Hazardous_Days_2013,Year_2014,Unhealthy_for_Sensitive_Groups_Days_2014,Unhealthy_Days_2014,Very_Unhealthy_Days_2014,Hazardous_Days_2014,Year_2015,Unhealthy_for_Sensitive_Groups_Days_2015,Unhealthy_Days_2015,Very_Unhealthy_Days_2015,Hazardous_Days_2015,Year_2016,Unhealthy_for_Sensitive_Groups_Days_2016,Unhealthy_Days_2016,Very_Unhealthy_Days_2016,Hazardous_Days_2016,Year_2017,Unhealthy_for_Sensitive_Groups_Days_2017,Unhealthy_Days_2017,Very_Unhealthy_Days_2017,Hazardous_Days_2017,Year_2018,Unhealthy_for_Sensitive_Groups_Days_2018,Unhealthy_Days_2018,Very_Unhealthy_Days_2018,Hazardous_Days_2018,Days_with_AQI,Good_Days,Moderate_Days
0,Alabama,Baldwin,2019,0,0,0,0,2009.0,2.0,0.0,0.0,0.0,2010.0,6.0,0.0,0.0,0.0,2011.0,9.0,0.0,0.0,0.0,2012.0,2.0,0.0,0.0,0.0,2013.0,0.0,0.0,0.0,0.0,2014.0,2.0,0.0,0.0,0.0,2015.0,1.0,0.0,0.0,0.0,2016.0,0.0,0.0,0.0,0.0,2017.0,1.0,0.0,0.0,0.0,2018.0,0.0,0.0,0.0,0.0,523.0,455.0,66.0
1,Alabama,Clay,2019,0,0,0,0,2009.0,0.0,0.0,0.0,0.0,2010.0,0.0,0.0,0.0,0.0,2011.0,0.0,0.0,0.0,0.0,2012.0,0.0,0.0,0.0,0.0,2013.0,0.0,0.0,0.0,0.0,2014.0,0.0,0.0,0.0,0.0,2015.0,0.0,0.0,0.0,0.0,2016.0,0.0,0.0,0.0,0.0,2017.0,0.0,0.0,0.0,0.0,2018.0,0.0,0.0,0.0,0.0,226.0,194.0,32.0
2,Alabama,Colbert,2019,0,0,0,0,2009.0,0.0,0.0,0.0,0.0,2010.0,1.0,0.0,0.0,0.0,2011.0,1.0,0.0,0.0,0.0,2012.0,6.0,0.0,0.0,0.0,2013.0,0.0,0.0,0.0,0.0,2014.0,0.0,0.0,0.0,0.0,2015.0,0.0,0.0,0.0,0.0,2016.0,1.0,0.0,0.0,0.0,2017.0,0.0,0.0,0.0,0.0,2018.0,0.0,0.0,0.0,0.0,586.0,472.0,114.0
3,Alabama,DeKalb,2019,0,0,0,0,2009.0,0.0,0.0,0.0,0.0,2010.0,4.0,0.0,0.0,0.0,2011.0,1.0,0.0,0.0,0.0,2012.0,5.0,0.0,0.0,0.0,2013.0,0.0,0.0,0.0,0.0,2014.0,0.0,0.0,0.0,0.0,2015.0,1.0,0.0,0.0,0.0,2016.0,1.0,0.0,0.0,0.0,2017.0,0.0,0.0,0.0,0.0,2018.0,0.0,0.0,0.0,0.0,724.0,635.0,89.0
4,Alabama,Elmore,2019,0,0,0,0,2009.0,0.0,0.0,0.0,0.0,2010.0,5.0,0.0,0.0,0.0,2011.0,2.0,0.0,0.0,0.0,2012.0,0.0,0.0,0.0,0.0,2013.0,0.0,0.0,0.0,0.0,2014.0,0.0,0.0,0.0,0.0,2015.0,1.0,0.0,0.0,0.0,2016.0,0.0,0.0,0.0,0.0,2017.0,0.0,0.0,0.0,0.0,2018.0,0.0,0.0,0.0,0.0,472.0,436.0,36.0


In [44]:
# Add the values of the merged columns
decade_df["Unhealthy_for_Sensitive_Groups_Days"] = decade_df["Unhealthy_for_Sensitive_Groups_Days_2019"] + decade_df["Unhealthy_for_Sensitive_Groups_Days_2009"]
+ decade_df["Unhealthy_for_Sensitive_Groups_Days_2010"] + decade_df["Unhealthy_for_Sensitive_Groups_Days_2011"]
+ decade_df["Unhealthy_for_Sensitive_Groups_Days_2012"] + decade_df["Unhealthy_for_Sensitive_Groups_Days_2013"]
+ decade_df["Unhealthy_for_Sensitive_Groups_Days_2014"] + decade_df["Unhealthy_for_Sensitive_Groups_Days_2015"]
+ decade_df["Unhealthy_for_Sensitive_Groups_Days_2016"] + decade_df["Unhealthy_for_Sensitive_Groups_Days_2017"]
+ decade_df["Unhealthy_for_Sensitive_Groups_Days_2018"]


# Drop the old AQI columns
decade_df = decade_df.drop(["Unhealthy_for_Sensitive_Groups_Days_2019", "Unhealthy_for_Sensitive_Groups_Days_2009",
                           "Unhealthy_for_Sensitive_Groups_Days_2010", "Unhealthy_for_Sensitive_Groups_Days_2011",
                           "Unhealthy_for_Sensitive_Groups_Days_2012", "Unhealthy_for_Sensitive_Groups_Days_2013",
                           "Unhealthy_for_Sensitive_Groups_Days_2014", "Unhealthy_for_Sensitive_Groups_Days_2015",
                           "Unhealthy_for_Sensitive_Groups_Days_2016", "Unhealthy_for_Sensitive_Groups_Days_2017",
                           "Unhealthy_for_Sensitive_Groups_Days_2018"], axis=1)

# View the dataframe
decade_df.head()

Unnamed: 0,State,County,Year_2019,Unhealthy_Days_2019,Very_Unhealthy_Days_2019,Hazardous_Days_2019,Year_2009,Unhealthy_Days_2009,Very_Unhealthy_Days_2009,Hazardous_Days_2009,Year_2010,Unhealthy_Days_2010,Very_Unhealthy_Days_2010,Hazardous_Days_2010,Year_2011,Unhealthy_Days_2011,Very_Unhealthy_Days_2011,Hazardous_Days_2011,Year_2012,Unhealthy_Days_2012,Very_Unhealthy_Days_2012,Hazardous_Days_2012,Year_2013,Unhealthy_Days_2013,Very_Unhealthy_Days_2013,Hazardous_Days_2013,Year_2014,Unhealthy_Days_2014,Very_Unhealthy_Days_2014,Hazardous_Days_2014,Year_2015,Unhealthy_Days_2015,Very_Unhealthy_Days_2015,Hazardous_Days_2015,Year_2016,Unhealthy_Days_2016,Very_Unhealthy_Days_2016,Hazardous_Days_2016,Year_2017,Unhealthy_Days_2017,Very_Unhealthy_Days_2017,Hazardous_Days_2017,Year_2018,Unhealthy_Days_2018,Very_Unhealthy_Days_2018,Hazardous_Days_2018,Days_with_AQI,Good_Days,Moderate_Days,Unhealthy_for_Sensitive_Groups_Days
0,Alabama,Baldwin,2019,0,0,0,2009.0,0.0,0.0,0.0,2010.0,0.0,0.0,0.0,2011.0,0.0,0.0,0.0,2012.0,0.0,0.0,0.0,2013.0,0.0,0.0,0.0,2014.0,0.0,0.0,0.0,2015.0,0.0,0.0,0.0,2016.0,0.0,0.0,0.0,2017.0,0.0,0.0,0.0,2018.0,0.0,0.0,0.0,523.0,455.0,66.0,2.0
1,Alabama,Clay,2019,0,0,0,2009.0,0.0,0.0,0.0,2010.0,0.0,0.0,0.0,2011.0,0.0,0.0,0.0,2012.0,0.0,0.0,0.0,2013.0,0.0,0.0,0.0,2014.0,0.0,0.0,0.0,2015.0,0.0,0.0,0.0,2016.0,0.0,0.0,0.0,2017.0,0.0,0.0,0.0,2018.0,0.0,0.0,0.0,226.0,194.0,32.0,0.0
2,Alabama,Colbert,2019,0,0,0,2009.0,0.0,0.0,0.0,2010.0,0.0,0.0,0.0,2011.0,0.0,0.0,0.0,2012.0,0.0,0.0,0.0,2013.0,0.0,0.0,0.0,2014.0,0.0,0.0,0.0,2015.0,0.0,0.0,0.0,2016.0,0.0,0.0,0.0,2017.0,0.0,0.0,0.0,2018.0,0.0,0.0,0.0,586.0,472.0,114.0,0.0
3,Alabama,DeKalb,2019,0,0,0,2009.0,0.0,0.0,0.0,2010.0,0.0,0.0,0.0,2011.0,0.0,0.0,0.0,2012.0,0.0,0.0,0.0,2013.0,0.0,0.0,0.0,2014.0,0.0,0.0,0.0,2015.0,0.0,0.0,0.0,2016.0,0.0,0.0,0.0,2017.0,0.0,0.0,0.0,2018.0,0.0,0.0,0.0,724.0,635.0,89.0,0.0
4,Alabama,Elmore,2019,0,0,0,2009.0,0.0,0.0,0.0,2010.0,0.0,0.0,0.0,2011.0,0.0,0.0,0.0,2012.0,0.0,0.0,0.0,2013.0,0.0,0.0,0.0,2014.0,0.0,0.0,0.0,2015.0,0.0,0.0,0.0,2016.0,0.0,0.0,0.0,2017.0,0.0,0.0,0.0,2018.0,0.0,0.0,0.0,472.0,436.0,36.0,0.0


In [45]:
# Add the values of the merged columns
decade_df["Unhealthy_Days"] = decade_df["Unhealthy_Days_2019"] + decade_df["Unhealthy_Days_2009"]
+ decade_df["Unhealthy_Days_2010"] + decade_df["Unhealthy_Days_2011"]
+ decade_df["Unhealthy_Days_2012"] + decade_df["Unhealthy_Days_2013"]
+ decade_df["Unhealthy_Days_2014"] + decade_df["Unhealthy_Days_2015"]
+ decade_df["Unhealthy_Days_2016"] + decade_df["Unhealthy_Days_2017"]
+ decade_df["Unhealthy_Days_2018"]


# Drop the old AQI columns
decade_df = decade_df.drop(["Unhealthy_Days_2019", "Unhealthy_Days_2009",
                           "Unhealthy_Days_2010", "Unhealthy_Days_2011",
                           "Unhealthy_Days_2012", "Unhealthy_Days_2013",
                           "Unhealthy_Days_2014", "Unhealthy_Days_2015",
                           "Unhealthy_Days_2016", "Unhealthy_Days_2017",
                           "Unhealthy_Days_2018"], axis=1)

# View the dataframe
decade_df.head()

Unnamed: 0,State,County,Year_2019,Very_Unhealthy_Days_2019,Hazardous_Days_2019,Year_2009,Very_Unhealthy_Days_2009,Hazardous_Days_2009,Year_2010,Very_Unhealthy_Days_2010,Hazardous_Days_2010,Year_2011,Very_Unhealthy_Days_2011,Hazardous_Days_2011,Year_2012,Very_Unhealthy_Days_2012,Hazardous_Days_2012,Year_2013,Very_Unhealthy_Days_2013,Hazardous_Days_2013,Year_2014,Very_Unhealthy_Days_2014,Hazardous_Days_2014,Year_2015,Very_Unhealthy_Days_2015,Hazardous_Days_2015,Year_2016,Very_Unhealthy_Days_2016,Hazardous_Days_2016,Year_2017,Very_Unhealthy_Days_2017,Hazardous_Days_2017,Year_2018,Very_Unhealthy_Days_2018,Hazardous_Days_2018,Days_with_AQI,Good_Days,Moderate_Days,Unhealthy_for_Sensitive_Groups_Days,Unhealthy_Days
0,Alabama,Baldwin,2019,0,0,2009.0,0.0,0.0,2010.0,0.0,0.0,2011.0,0.0,0.0,2012.0,0.0,0.0,2013.0,0.0,0.0,2014.0,0.0,0.0,2015.0,0.0,0.0,2016.0,0.0,0.0,2017.0,0.0,0.0,2018.0,0.0,0.0,523.0,455.0,66.0,2.0,0.0
1,Alabama,Clay,2019,0,0,2009.0,0.0,0.0,2010.0,0.0,0.0,2011.0,0.0,0.0,2012.0,0.0,0.0,2013.0,0.0,0.0,2014.0,0.0,0.0,2015.0,0.0,0.0,2016.0,0.0,0.0,2017.0,0.0,0.0,2018.0,0.0,0.0,226.0,194.0,32.0,0.0,0.0
2,Alabama,Colbert,2019,0,0,2009.0,0.0,0.0,2010.0,0.0,0.0,2011.0,0.0,0.0,2012.0,0.0,0.0,2013.0,0.0,0.0,2014.0,0.0,0.0,2015.0,0.0,0.0,2016.0,0.0,0.0,2017.0,0.0,0.0,2018.0,0.0,0.0,586.0,472.0,114.0,0.0,0.0
3,Alabama,DeKalb,2019,0,0,2009.0,0.0,0.0,2010.0,0.0,0.0,2011.0,0.0,0.0,2012.0,0.0,0.0,2013.0,0.0,0.0,2014.0,0.0,0.0,2015.0,0.0,0.0,2016.0,0.0,0.0,2017.0,0.0,0.0,2018.0,0.0,0.0,724.0,635.0,89.0,0.0,0.0
4,Alabama,Elmore,2019,0,0,2009.0,0.0,0.0,2010.0,0.0,0.0,2011.0,0.0,0.0,2012.0,0.0,0.0,2013.0,0.0,0.0,2014.0,0.0,0.0,2015.0,0.0,0.0,2016.0,0.0,0.0,2017.0,0.0,0.0,2018.0,0.0,0.0,472.0,436.0,36.0,0.0,0.0


In [46]:
# Add the values of the merged columns
decade_df["Very_Unhealthy_Days"] = decade_df["Very_Unhealthy_Days_2019"] + decade_df["Very_Unhealthy_Days_2009"]
+ decade_df["Very_Unhealthy_Days_2010"] + decade_df["Very_Unhealthy_Days_2011"]
+ decade_df["Very_Unhealthy_Days_2012"] + decade_df["Very_Unhealthy_Days_2013"]
+ decade_df["Very_Unhealthy_Days_2014"] + decade_df["Very_Unhealthy_Days_2015"]
+ decade_df["Very_Unhealthy_Days_2016"] + decade_df["Very_Unhealthy_Days_2017"]
+ decade_df["Very_Unhealthy_Days_2018"]


# Drop the old AQI columns
decade_df = decade_df.drop(["Very_Unhealthy_Days_2019", "Very_Unhealthy_Days_2009",
                           "Very_Unhealthy_Days_2010", "Very_Unhealthy_Days_2011",
                           "Very_Unhealthy_Days_2012", "Very_Unhealthy_Days_2013",
                           "Very_Unhealthy_Days_2014", "Very_Unhealthy_Days_2015",
                           "Very_Unhealthy_Days_2016", "Very_Unhealthy_Days_2017",
                           "Very_Unhealthy_Days_2018"], axis=1)

# View the dataframe
decade_df.head()

Unnamed: 0,State,County,Year_2019,Hazardous_Days_2019,Year_2009,Hazardous_Days_2009,Year_2010,Hazardous_Days_2010,Year_2011,Hazardous_Days_2011,Year_2012,Hazardous_Days_2012,Year_2013,Hazardous_Days_2013,Year_2014,Hazardous_Days_2014,Year_2015,Hazardous_Days_2015,Year_2016,Hazardous_Days_2016,Year_2017,Hazardous_Days_2017,Year_2018,Hazardous_Days_2018,Days_with_AQI,Good_Days,Moderate_Days,Unhealthy_for_Sensitive_Groups_Days,Unhealthy_Days,Very_Unhealthy_Days
0,Alabama,Baldwin,2019,0,2009.0,0.0,2010.0,0.0,2011.0,0.0,2012.0,0.0,2013.0,0.0,2014.0,0.0,2015.0,0.0,2016.0,0.0,2017.0,0.0,2018.0,0.0,523.0,455.0,66.0,2.0,0.0,0.0
1,Alabama,Clay,2019,0,2009.0,0.0,2010.0,0.0,2011.0,0.0,2012.0,0.0,2013.0,0.0,2014.0,0.0,2015.0,0.0,2016.0,0.0,2017.0,0.0,2018.0,0.0,226.0,194.0,32.0,0.0,0.0,0.0
2,Alabama,Colbert,2019,0,2009.0,0.0,2010.0,0.0,2011.0,0.0,2012.0,0.0,2013.0,0.0,2014.0,0.0,2015.0,0.0,2016.0,0.0,2017.0,0.0,2018.0,0.0,586.0,472.0,114.0,0.0,0.0,0.0
3,Alabama,DeKalb,2019,0,2009.0,0.0,2010.0,0.0,2011.0,0.0,2012.0,0.0,2013.0,0.0,2014.0,0.0,2015.0,0.0,2016.0,0.0,2017.0,0.0,2018.0,0.0,724.0,635.0,89.0,0.0,0.0,0.0
4,Alabama,Elmore,2019,0,2009.0,0.0,2010.0,0.0,2011.0,0.0,2012.0,0.0,2013.0,0.0,2014.0,0.0,2015.0,0.0,2016.0,0.0,2017.0,0.0,2018.0,0.0,472.0,436.0,36.0,0.0,0.0,0.0


In [47]:
# Add the values of the merged columns
decade_df["Hazardous_Days"] = decade_df["Hazardous_Days_2019"] + decade_df["Hazardous_Days_2009"]
+ decade_df["Hazardous_Days_2010"] + decade_df["Hazardous_Days_2011"]
+ decade_df["Hazardous_Days_2012"] + decade_df["Hazardous_Days_2013"]
+ decade_df["Hazardous_Days_2014"] + decade_df["Hazardous_Days_2015"]
+ decade_df["Hazardous_Days_2016"] + decade_df["Hazardous_Days_2017"]
+ decade_df["Hazardous_Days_2018"]


# Drop the old AQI columns
decade_df = decade_df.drop(["Hazardous_Days_2019", "Hazardous_Days_2009",
                           "Hazardous_Days_2010", "Hazardous_Days_2011",
                           "Hazardous_Days_2012", "Hazardous_Days_2013",
                           "Hazardous_Days_2014", "Hazardous_Days_2015",
                           "Hazardous_Days_2016", "Hazardous_Days_2017",
                           "Hazardous_Days_2018"], axis=1)

# View the dataframe
decade_df.head()

Unnamed: 0,State,County,Year_2019,Year_2009,Year_2010,Year_2011,Year_2012,Year_2013,Year_2014,Year_2015,Year_2016,Year_2017,Year_2018,Days_with_AQI,Good_Days,Moderate_Days,Unhealthy_for_Sensitive_Groups_Days,Unhealthy_Days,Very_Unhealthy_Days,Hazardous_Days
0,Alabama,Baldwin,2019,2009.0,2010.0,2011.0,2012.0,2013.0,2014.0,2015.0,2016.0,2017.0,2018.0,523.0,455.0,66.0,2.0,0.0,0.0,0.0
1,Alabama,Clay,2019,2009.0,2010.0,2011.0,2012.0,2013.0,2014.0,2015.0,2016.0,2017.0,2018.0,226.0,194.0,32.0,0.0,0.0,0.0,0.0
2,Alabama,Colbert,2019,2009.0,2010.0,2011.0,2012.0,2013.0,2014.0,2015.0,2016.0,2017.0,2018.0,586.0,472.0,114.0,0.0,0.0,0.0,0.0
3,Alabama,DeKalb,2019,2009.0,2010.0,2011.0,2012.0,2013.0,2014.0,2015.0,2016.0,2017.0,2018.0,724.0,635.0,89.0,0.0,0.0,0.0,0.0
4,Alabama,Elmore,2019,2009.0,2010.0,2011.0,2012.0,2013.0,2014.0,2015.0,2016.0,2017.0,2018.0,472.0,436.0,36.0,0.0,0.0,0.0,0.0


In [48]:
# Drop the year columns
decade_df = decade_df.drop(["Year_2019", "Year_2009",
                           "Year_2010", "Year_2011",
                           "Year_2012", "Year_2013",
                           "Year_2014", "Year_2015",
                           "Year_2016", "Year_2017",
                           "Year_2018"], axis=1)

# View the dataframe
decade_df.head()

Unnamed: 0,State,County,Days_with_AQI,Good_Days,Moderate_Days,Unhealthy_for_Sensitive_Groups_Days,Unhealthy_Days,Very_Unhealthy_Days,Hazardous_Days
0,Alabama,Baldwin,523.0,455.0,66.0,2.0,0.0,0.0,0.0
1,Alabama,Clay,226.0,194.0,32.0,0.0,0.0,0.0,0.0
2,Alabama,Colbert,586.0,472.0,114.0,0.0,0.0,0.0,0.0
3,Alabama,DeKalb,724.0,635.0,89.0,0.0,0.0,0.0,0.0
4,Alabama,Elmore,472.0,436.0,36.0,0.0,0.0,0.0,0.0


In [49]:
# Check that datatypes are correct
decade_df.dtypes

State                                   object
County                                  object
Days_with_AQI                          float64
Good_Days                              float64
Moderate_Days                          float64
Unhealthy_for_Sensitive_Groups_Days    float64
Unhealthy_Days                         float64
Very_Unhealthy_Days                    float64
Hazardous_Days                         float64
dtype: object

In [50]:
# Check for null values
decade_df.isnull().sum()

State                                   0
County                                  0
Days_with_AQI                          54
Good_Days                              54
Moderate_Days                          54
Unhealthy_for_Sensitive_Groups_Days    54
Unhealthy_Days                         54
Very_Unhealthy_Days                    54
Hazardous_Days                         54
dtype: int64

In [51]:
# Replace null values with 0
decade_df = decade_df.fillna(0)

# Check for null values
decade_df.isnull().sum()

State                                  0
County                                 0
Days_with_AQI                          0
Good_Days                              0
Moderate_Days                          0
Unhealthy_for_Sensitive_Groups_Days    0
Unhealthy_Days                         0
Very_Unhealthy_Days                    0
Hazardous_Days                         0
dtype: int64

In [52]:
# Create State_County column
decade_df["State_County"] = decade_df["State"].astype(str) + ", "+ decade_df["County"]



In [54]:
# Reorder the dataframe
decade_df.columns = ['State_County',
                     'State',
                     'County',
                     'Days_with_AQI',
                     'Good_Days',
                     'Moderate_Days',
                     'Unhealthy_for_Sensitive_Groups_Days',
                     'Unhealthy_Days',
                     'Very_Unhealthy_Days',
                     'Hazardous_Days'
                    ]

# View the data
decade_df

Unnamed: 0,State_County,State,County,Days_with_AQI,Good_Days,Moderate_Days,Unhealthy_for_Sensitive_Groups_Days,Unhealthy_Days,Very_Unhealthy_Days,Hazardous_Days
0,Alabama,Baldwin,523.0,455.0,66.0,2.0,0.0,0.0,0.0,"Alabama, Baldwin"
1,Alabama,Clay,226.0,194.0,32.0,0.0,0.0,0.0,0.0,"Alabama, Clay"
2,Alabama,Colbert,586.0,472.0,114.0,0.0,0.0,0.0,0.0,"Alabama, Colbert"
3,Alabama,DeKalb,724.0,635.0,89.0,0.0,0.0,0.0,0.0,"Alabama, DeKalb"
4,Alabama,Elmore,472.0,436.0,36.0,0.0,0.0,0.0,0.0,"Alabama, Elmore"
...,...,...,...,...,...,...,...,...,...,...
1014,Wyoming,Sublette,730.0,543.0,176.0,8.0,3.0,0.0,0.0,"Wyoming, Sublette"
1015,Wyoming,Sweetwater,730.0,542.0,183.0,4.0,1.0,0.0,0.0,"Wyoming, Sweetwater"
1016,Wyoming,Teton,730.0,653.0,75.0,1.0,1.0,0.0,0.0,"Wyoming, Teton"
1017,Wyoming,Uinta,730.0,656.0,73.0,1.0,0.0,0.0,0.0,"Wyoming, Uinta"


In [55]:
# Export the dataframe to csv
decade_df.to_csv('./Resources/processed_Decade_Air_Quality.csv')