# Clean Air Vehicles vs Air Pollution by State

### Imports

In [2]:
##  Imports

import pandas as pd
import numpy as np
import csv
import plotly.graph_objects as go
import plotly.express as px
import os
import logging
from dash import Dash, dcc, html, Input, Output


## Define states  

In [1]:
states = [ 'AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA',
           'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME',
           'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM',
           'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
           'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY']

state_dfs = {}



### Borrowed state to abreviation dictionary from github

In [4]:
# United States of America Python Dictionary to translate States,
# Districts & Territories to Two-Letter codes and vice versa.
#
# Canonical URL: https://gist.github.com/rogerallen/1583593
#
# Dedicated to the public domain.  To the extent possible under law,
# Roger Allen has waived all copyright and related or neighboring
# rights to this code.  Data originally from Wikipedia at the url:
# https://en.wikipedia.org/wiki/ISO_3166-2:US
#
# Automatically Generated 2021-09-11 18:04:36 via Jupyter Notebook from
# https://gist.github.com/rogerallen/d75440e8e5ea4762374dfd5c1ddf84e0 

us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "U.S. Virgin Islands": "VI",
}
    
# invert the dictionary
abbrev_to_us_state = dict(map(reversed, us_state_to_abbrev.items()))

## Paths for all files and column names for each year

In [5]:
# Updated list for each report.  Could not run all at same time due to changing header names
pollutants = []
# years = [2020, 2017, 2014, 2011, 2008]
onroad_paths = {2020: [
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2020_nei_onroad_123.csv",
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2020_nei_onroad_4.csv",
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2020_nei_onroad_5.csv",
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2020_nei_onroad_67.csv",
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2020_nei_onroad_8910.csv"
                ],
                2017: [
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2017_nei_onroad_123.csv",
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2017_nei_onroad_4.csv",
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2017_nei_onroad_5.csv",
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2017_nei_onroad_67.csv",
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2017_nei_onroad_8910.csv"
                ],
                2014: [
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2014_nei_onroad_123.csv",
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2014_nei_onroad_4.csv",
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2014_nei_onroad_5.csv",
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2014_nei_onroad_67.csv",
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2014_nei_onroad_8910.csv"
                ],
                2011: [
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2011_nei_onroad_123.csv",
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2011_nei_onroad_4.csv",
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2011_nei_onroad_5.csv",
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2011_nei_onroad_67.csv",
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2011_nei_onroad_8910.csv"               
                ],
                2008: [
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2008NEIv3_onroad123.csv",
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2008NEIv3_onroad4.csv",
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2008NEIv3_onroad5.csv",
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2008NEIv3_onroad67.csv",
    "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2008NEIv3_onroad8910.csv"
                ]}

state_column = {
    2008:"st_usps_cd",
    2011:"st_usps_cd",
    2014:"st_usps_cd",
    2017:"state",
    2020:"state"
}


pol_descr_column = {
    2008: "description",
    2011: "description",
    2014: "pollutant_desc",
    2017: "pollutant desc",
    2020: "pollutant desc"
}



tot_em_column = {
    2008:"total_emissions",
    2011:"total_emissions",
    2014:"total_emissions",
    2017:"total emissions",
    2020:"total emissions"
}

uom_column = {
    2008:"uom",
    2011:"uom",
    2014:"uom",
    2017:"emissions uom",
    2020:"emissions uom"
}



In [6]:
# Pollutants of Concern
poc = [
    'Carbon Dioxide',
    'Carbon Monoxide',
    'Chromium (VI)',
    'Manganese',
    'Mercury',
    'Methane',
    'Nickel',
    'Nitrogen Oxides',
    'Nitrous Oxide',
    'PM10 Primary (Filt + Cond)',
    'PM2.5 Primary (Filt + Cond)',
    'Sulfur Dioxide',
    'Volatile Organic Compounds'
] 

# Symbols
poc_symbols = {
    'Carbon Dioxide':"CO2",
    'Carbon Monoxide':'CO',
    'Chromium (VI)': "Cr VI",
    'Manganese': 'Mn',
    'Mercury': 'Hg',
    'Methane': 'CH4',
    'Nickel': 'Ni',
    'Nitrogen Oxides': 'NOx',
    'Nitrous Oxide': 'N2O',
    'PM10 Primary (Filt + Cond)': 'PM10 Prime',
    'PM2.5 Primary (Filt + Cond)': 'PM2.5 Prime',
    'Sulfur Dioxide':'SO2',
    'Volatile Organic Compounds':"VOC"
}

# Units of Measure
poc_uom = {
        "Carbon Dioxide": "Tons",
        'Carbon Monoxide':'Tons',
        'Chromium (VI)':"Lb's",
        'Manganese':"Lb's",
        'Mercury':"Lb's",
        'Methane':'Tons',
        'Nickel':"Lb's",
        'Nitrogen Oxides':'Tons',
        'Nitrous Oxide':'Tons',
        'PM10 Primary (Filt + Cond)':'Tons',
        'PM2.5 Primary (Filt + Cond)':'Tons',
        'Sulfur Dioxide':'Tons',
        'Volatile Organic Compounds':'Tons'
} 

# Health Pollutants
poc_health = [
    'Chromium (VI)',
    'Manganese',
    'Mercury',
    'Nickel',
    'Nitrogen Oxides',
    'PM10 Primary (Filt + Cond)',
    'PM2.5 Primary (Filt + Cond)',
    'Sulfur Dioxide',
    'Volatile Organic Compounds'
] 

# Greenhouse Gasses
poc_GHG = [
    'Carbon Dioxide',
    'Carbon Monoxide',
    'Methane',
    'Nitrous Oxide'
] 

In [7]:
# Top 11 and bottom 10 Clean Air Vehicle adoption states. Used 11 states for top because two were virtual ties
top_10_CAV_states = ['CA','WA','OR','HI','VT','MA','MD','CO','NV','VA', 'AZ']
bottom_10_CAV_states = ['MI', 'ND','LA','WY','AL','SD','WV','AR','OK','KY']

top_and_bottom_CAV_states = [
    'CA','WA','OR','HI','VT','MA','MD','CO','NV','VA', 
    'MI', 'ND','LA','WY','AL','SD','WV','AR','OK','KY'
]

years = [2008, 2011, 2014, 2017, 2020]



In [8]:
# import population adata

US_pop = pd.read_csv('/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/US_pop_census - 2000_to_2020.csv',low_memory=False)


In [129]:
# US_pop.loc[US_pop["NAME"] == abbrev_to_us_state[state]][f'POPESTIMATE{year}']

# print(US_pop.loc[US_pop["NAME"] == abbrev_to_us_state["WA"]][f'POPESTIMATE{2008}'])

# US_pop[abbrev_to_us_state[state]]

pop = US_pop.loc[US_pop["NAME"] == "California"]["POPESTIMATE2000"]
print(pop.iloc[0])
print(type(pop.iloc[0]))

33987977
<class 'numpy.int64'>


## Main script for transforming NEI raw data to desired data frame

In [131]:
# df for all 21 states studied
nei_by_pollutant = pd.DataFrame({
    "Pollutant":pd.Series(dtype='str'),
    "Symbol": pd.Series(dtype='str'),
    "State":pd.Series(dtype='str'),
    "State_name":pd.Series(dtype='str'),
    "CAV_adoption":pd.Series(dtype='str'),
    "Year":pd.Series(dtype='int'),
    "Concern":pd.Series(dtype='str'),
    "Total Emissions":pd.Series(dtype='float'),
    "Emmisions_per_100k":pd.Series(dtype='float'),
    "UOM":pd.Series(dtype='str')
}, index = ["Pollutant"])

#df for states with high CAV use
high_CAV_nei_by_pollutant = pd.DataFrame({
    "Pollutant":pd.Series(dtype='str'),
    "Symbol": pd.Series(dtype='str'),
    "State":pd.Series(dtype='str'),
    "State_name":pd.Series(dtype='str'),
    "CAV_adoption":pd.Series(dtype='str'),
    "Year":pd.Series(dtype='int'),
    "Concern":pd.Series(dtype='str'),
    "Total Emissions":pd.Series(dtype='float'),
    "Emmisions_per_100k":pd.Series(dtype='float'),
    "UOM":pd.Series(dtype='str')
}, index = ["Pollutant"])

#df for states with low CAV use
low_CAV_nei_by_pollutant = pd.DataFrame({
    "Pollutant":pd.Series(dtype='str'),
    "Symbol": pd.Series(dtype='str'),
    "State":pd.Series(dtype='str'),
    "State_name":pd.Series(dtype='str'),
    "CAV_adoption":pd.Series(dtype='str'),
    "Year":pd.Series(dtype='int'),
    "Concern":pd.Series(dtype='str'),
    "Total Emissions":pd.Series(dtype='float'),
    "Emmisions_per_100k":pd.Series(dtype='float'),
    "UOM":pd.Series(dtype='str')
}, index = ["Pollutant"])

logging.basicConfig(filename='nei_by_pollutant.log')

for year in years:
    print(f'\nWorking on {year}')
    logging.info(f'\nWorking on {year}')
    for path in onroad_paths[year]:
        print(f'\nOpening {path}')
        logging.info('\nOpening {path}')
        onroad = pd.read_csv(path,low_memory=False)

        state_pollutants = onroad.groupby([state_column[year], pol_descr_column[year]], sort = False)[tot_em_column[year]].sum()

#         for debugging:
#         print(state_pollutants.head(n=5))
        state_pollutants.to_csv(f'state_pollutants_{year}_{path[-8:-4]}.csv')
        
        for state in top_and_bottom_CAV_states:
            
            if state in onroad[state_column[year]].unique():
                logging.info(f"Found {abbrev_to_us_state[state]} in file.")
                state_in_file = True
            else:
                state_in_file = False
                logging.info(f'{abbrev_to_us_state[state]} not in file.')
            
            if state_in_file:
                for pollutant in poc:
                                       
                    if pollutant in poc_health:
                        concern = "Health"
                    elif pollutant in poc_GHG:
                        concern = "GHG"
                    else:
                        concern = "Error"

                    if state in top_10_CAV_states:
                        adoption = "HIGH"
                    else:
                        adoption = "LOW"
                    
                    
                    
                    if (state, pollutant) in state_pollutants:
                        
                        pop = US_pop.loc[US_pop["NAME"] == abbrev_to_us_state[state]][f"POPESTIMATE{year}"]
                        emissions_per_100k = 100000 * state_pollutants[state, pollutant] / pop.iloc[0]
                        
#                         for debugging:
#                         print(f'\n{state} {pollutant} per 100k = {emissions_per_100k} {poc_uom[pollutant]}\n')
#                         print(f'dtype of emissions_per_100k: {type(emissions_per_100k)}')
#                         print(f'\nType of state_pollutants[state, pollutant]: {type(state_pollutants[state, pollutant])}')
#                         print(f'Type of US_pop.loc[US_pop["NAME"] == abbrev_to_us_state[state]]: {type(US_pop.loc[US_pop["NAME"] == abbrev_to_us_state[state]])}')
#                         print(f'Type of US_pop.loc[US_pop["NAME"] == abbrev_to_us_state[state]][f"POPESTIMATE{year}"]: {type(US_pop.loc[US_pop["NAME"] == abbrev_to_us_state[state]][f"POPESTIMATE{year}"])}')
#                         print(f"Type of US_pop.loc[US_pop[abbrev_to_us_state[state]][f'POPESTIMATE{year}']]: {type(US_pop.loc[US_pop[abbrev_to_us_state[state]][f'POPESTIMATE{year}']])}")
                        
                        new_row = pd.DataFrame({
                            "Pollutant": pollutant, 
                            "Symbol":poc_symbols[pollutant],
                            "Concern": concern,
                            "State": state,
                            "State_name":abbrev_to_us_state[state],
                            "CAV_adoption": adoption,
                            "Year": int(year),
                            "Total_Emissions": state_pollutants[state, pollutant],
                            "Emissions_per_100k": emissions_per_100k,
                            "UOM": poc_uom[pollutant]
                        }, index = ["Pollutant"]) 


                        nei_by_pollutant = pd.concat([nei_by_pollutant,new_row], ignore_index = False)
                        
                        if state in top_10_CAV_states:
                            high_CAV_nei_by_pollutant = pd.concat([high_CAV_nei_by_pollutant,new_row], ignore_index = False)
                        elif state in bottom_10_CAV_states:
                            low_CAV_nei_by_pollutant = pd.concat([low_CAV_nei_by_pollutant,new_row], ignore_index = False)
                    else:
                        logging.warning(f'({state}, {pollutant}) not found in state_pollutants.')
            else:
                continue

print("\nCreating nei_by_pollutant.csv")
logging.info("\nCreating nei_by_pollutant.csv")
nei_by_pollutant.to_csv('nei_by_pollutant.csv')

print("\nCreating top_CAV_nei_by_pollutant.csv")
logging.info("\nCreating top_CAV_nei_by_pollutant.csv")
high_CAV_nei_by_pollutant.to_csv('high_CAV_nei_by_pollutant.csv')

print("\nCreating low_CAV_nei_by_pollutant.csv")
logging.info("\nCreating low_CAV_nei_by_pollutant.csv")
low_CAV_nei_by_pollutant.to_csv('low_CAV_nei_by_pollutant.csv')
        


Working on 2008

Opening /Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2008NEIv3_onroad123.csv

Opening /Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2008NEIv3_onroad4.csv

Opening /Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2008NEIv3_onroad5.csv

Opening /Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2008NEIv3_onroad67.csv

Opening /Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2008NEIv3_onroad8910.csv

Working on 2011

Opening /Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2011_nei_onroad_123.csv

Opening /Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2011_nei_onroad_4.csv

Opening /Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2011_nei_onroad_5.csv

Opening /Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2011_nei_onroad_67.csv

Opening /Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2011_nei_onroad_8910.csv

Working on 2014

Opening /Users/ScottMcL/Do

In [20]:
nei_by_pollutant = pd.read_csv('nei_by_pollutant.csv')
nei_by_pollutant.head()

Unnamed: 0,Pollutant,Symbol,State,State_name,CAV_adoption,Year,Concern,Total Emissions,Emmisions_per_100k,UOM
0,Carbon Dioxide,CO2,VT,Vermont,High CAV adoption,2008,GHG,3631471.0,581825.7417,Tons
1,Carbon Monoxide,CO,VT,Vermont,High CAV adoption,2008,GHG,66710.13,10688.13953,Tons
2,Chromium (VI),Cr VI,VT,Vermont,High CAV adoption,2008,Health,61.62756,9.873823,Lb's
3,Manganese,Mn,VT,Vermont,High CAV adoption,2008,Health,30.47231,4.882202,Lb's
4,Mercury,Hg,VT,Vermont,High CAV adoption,2008,Health,1.761573,0.282235,Lb's


## Health Pollutant Plots

In [30]:

chromium_VI = nei_by_pollutant.loc[(nei_by_pollutant["Pollutant"] == 'Chromium (VI)')]

chromium_VI_plot = px.line(
    chromium_VI,
    x = "Year",
    y = "Emmisions_per_100k",
    hover_name = "CAV_adoption",
    color = "State",
    labels = {
        "Year":'',
        "Emmisions_per_100k": "Lb's of annual emissions per 100k of population"
    }
)

chromium_VI_plot.update_layout(
    yaxis_range=[0,2],
    font_color="white",
    legend=dict(
    title="State"
))

chromium_VI_plot.update_layout({
    'plot_bgcolor':'black',
    'paper_bgcolor':'black',
    'title':'Chromium (VI) emissions per 100k of population'
})

chromium_VI_plot.show()



In [31]:
# Manganese
Manganese = nei_by_pollutant.loc[(nei_by_pollutant["Pollutant"] == 'Manganese')]

Manganese_plot = px.line(
    Manganese,
    x = "Year",
    y = "Emmisions_per_100k",
    hover_name = "CAV_adoption",
    color = "State",
    labels = {
        "Year":'',
        "Emmisions_per_100k": "Lb's of annual emissions per 100k of population"
   }
)

Manganese_plot.update_layout(
    yaxis_range=[0,200],
    font_color="white",
    legend=dict(
    title="State"
))

Manganese_plot.update_layout({
    'plot_bgcolor':'black',
    'paper_bgcolor':'black',
    'title':'Manganese emissions per 100k of population'
})

Manganese_plot.show()

In [29]:
#     'Mercury',
Mercury = nei_by_pollutant.loc[(nei_by_pollutant["Pollutant"] == 'Mercury')]

Mercury_plot = px.line(
    Mercury,
    x = "Year",
    y = "Emmisions_per_100k",
    hover_name = "CAV_adoption",
    color = "State",
    labels = {
        "Year":'',
        "Emmisions_per_100k": "Lb's of annual emissions per 100k of population"
    }
)

Mercury_plot.update_layout(
    yaxis_range=[0,2],
    font_color="white",
    legend=dict(
    title="State"
))

Mercury_plot.update_layout({
    'plot_bgcolor':'black',
    'paper_bgcolor':'black',
    'title':'Mercury emisions per 100k of population'
})

Mercury_plot.show()

In [34]:
#     'Nickel',
Nickel = nei_by_pollutant.loc[(nei_by_pollutant["Pollutant"] == 'Nickel')]

Nickel_plot = px.line(
    Nickel,
    x = "Year",
    y = "Emmisions_per_100k",
    hover_name = "CAV_adoption",
    color = "State",
    labels = {
        "Year":'',
        "Emmisions_per_100k": "Lb's of annual emissions per 100k of population"
    }
)

Nickel_plot.update_layout(
#     yaxis_range=[0,30],
    font_color="white",
    legend=dict(
    title="State"
))

Nickel_plot.update_layout({
    'plot_bgcolor':'black',
    'paper_bgcolor':'black',
    'title':'Nickel emisions per 100k of population'
})

Nickel_plot.show()

In [35]:
Nitrogen_Oxides = nei_by_pollutant.loc[(nei_by_pollutant["Pollutant"] == 'Nitrogen Oxides')]

Nitrogen_Oxides_plot = px.line(
    Nitrogen_Oxides,
    x = "Year",
    y = "Emmisions_per_100k",
    hover_name = "CAV_adoption",
    color = "State",
    labels = {
        "Year":'',
        "Emmisions_per_100k": "Tons of annual emissions per 100k of population"
    }
)

Nitrogen_Oxides_plot.update_layout(
#     yaxis_range=[0,30],
    font_color="white",
    legend=dict(
    title="State"
))

Nitrogen_Oxides_plot.update_layout({
    'plot_bgcolor':'black',
    'paper_bgcolor':'black',
    'title':'Nitrogen Oxides emisions per 100k of population'
})

Nitrogen_Oxides_plot.show()

In [36]:
PM10 = nei_by_pollutant.loc[(nei_by_pollutant["Pollutant"] == 'PM10 Primary (Filt + Cond)')]

PM10_plot = px.line(
    PM10,
    x = "Year",
    y = "Emmisions_per_100k",
    hover_name = "CAV_adoption",
    color = "State",
    labels = {
        "Year":'',
        "Emmisions_per_100k": "Tons of annual emissions per 100k of population"
    }
)

PM10_plot.update_layout(
#     yaxis_range=[0,30],
    font_color="white",
    legend=dict(
    title="State"
))

PM10_plot.update_layout({
    'plot_bgcolor':'black',
    'paper_bgcolor':'black',
    'title':'Large Particulate Matter (< 10um) emisions per 100k of population'
})

PM10_plot.show()

In [37]:
PM25 = nei_by_pollutant.loc[(nei_by_pollutant["Pollutant"] == 'PM2.5 Primary (Filt + Cond)')]

PM25_plot = px.line(
    PM25,
    x = "Year",
    y = "Emmisions_per_100k",
    hover_name = "CAV_adoption",
    color = "State",
    labels = {
        "Year":'',
        "Emmisions_per_100k": "Tons of annual emissions per 100k of population"
    }
)

PM25_plot.update_layout(
#     yaxis_range=[0,30],
    font_color="white",
    legend=dict(
    title="State"
))

PM25_plot.update_layout({
    'plot_bgcolor':'black',
    'paper_bgcolor':'black',
    'title':'Fine Particulate Matter (< 2.5um) emisions per 100k of population'
})

PM25_plot.show()

In [41]:
Sulfur_Dioxide = nei_by_pollutant.loc[(nei_by_pollutant["Pollutant"] == 'Sulfur Dioxide')]

Sulfur_Dioxide_plot = px.line(
    Sulfur_Dioxide,
    x = "Year",
    y = "Emmisions_per_100k",
    hover_name = "CAV_adoption",
    color = "State",
    labels = {
        "Year":'',
        "Emmisions_per_100k": "Tons of annual emissions per 100k of population"
    }
)

Sulfur_Dioxide_plot.update_layout(
#     yaxis_range=[0,6],
    font_color="white",
    legend=dict(
    title="State"
))

Sulfur_Dioxide_plot.update_layout({
    'plot_bgcolor':'black',
    'paper_bgcolor':'black',
    'title':'Sulfur Dioxide emisions per 100k of population'
})

Sulfur_Dioxide_plot.show()

In [42]:
VOC = nei_by_pollutant.loc[(nei_by_pollutant["Pollutant"] == 'Volatile Organic Compounds')]

VOC_plot = px.line(
    VOC,
    x = "Year",
    y = "Emmisions_per_100k",
    hover_name = "CAV_adoption",
    color = "State",
    labels = {
        "Year":'',
        "Emmisions_per_100k": "Tons of annual emissions per 100k of population"
    }
)

VOC_plot.update_layout(
#     yaxis_range=[0,6],
    font_color="white",
    legend=dict(
    title="State"
))

VOC_plot.update_layout({
    'plot_bgcolor':'black',
    'paper_bgcolor':'black',
    'title':'Volatile Organic Compounds (VOC) emisions per 100k of population'
})

VOC_plot.show()

## Greenhouse Gas Pollutant Plots

In [43]:
Carbon_Dioxide = nei_by_pollutant.loc[(nei_by_pollutant["Pollutant"] == 'Carbon Dioxide')]

Carbon_Dioxide_plot = px.line(
    Carbon_Dioxide,
    x = "Year",
    y = "Emmisions_per_100k",
    hover_name = "CAV_adoption",
    color = "State",
    labels = {
        "Year":'',
        "Emmisions_per_100k": "Tons of annual emissions per 100k of population"
    }
)

Carbon_Dioxide_plot.update_layout(
#     yaxis_range=[0,6],
    font_color="white",
    legend=dict(
    title="State"
))

Carbon_Dioxide_plot.update_layout({
    'plot_bgcolor':'black',
    'paper_bgcolor':'black',
    'title':'Carbon Dioxide emisions per 100k of population'
})

Carbon_Dioxide_plot.show()

In [44]:
Carbon_Monoxide = nei_by_pollutant.loc[(nei_by_pollutant["Pollutant"] == 'Carbon Monoxide')]

Carbon_Monoxide_plot = px.line(
    Carbon_Monoxide,
    x = "Year",
    y = "Emmisions_per_100k",
    hover_name = "CAV_adoption",
    color = "State",
    labels = {
        "Year":'',
        "Emmisions_per_100k": "Tons of annual emissions per 100k of population"
    }
)

Carbon_Monoxide_plot.update_layout(
#     yaxis_range=[0,6],
    font_color="white",
    legend=dict(
    title="State"
))

Carbon_Monoxide_plot.update_layout({
    'plot_bgcolor':'black',
    'paper_bgcolor':'black',
    'title':'Carbon Monoxide emisions per 100k of population'
})

Carbon_Monoxide_plot.show()

In [45]:
Methane = nei_by_pollutant.loc[(nei_by_pollutant["Pollutant"] == 'Methane')]

Methane_plot = px.line(
    Methane,
    x = "Year",
    y = "Emmisions_per_100k",
    hover_name = "CAV_adoption",
    color = "State",
    labels = {
        "Year":'',
        "Emmisions_per_100k": "Tons of annual emissions per 100k of population"
    }
)

Methane_plot.update_layout(
#     yaxis_range=[0,6],
    font_color="white",
    legend=dict(
    title="State"
))

Methane_plot.update_layout({
    'plot_bgcolor':'black',
    'paper_bgcolor':'black',
    'title':'Methane emisions per 100k of population'
})

Methane_plot.show()

In [46]:
Nitrous_Oxide = nei_by_pollutant.loc[(nei_by_pollutant["Pollutant"] == 'Nitrous Oxide')]

Nitrous_Oxide_plot = px.line(
    Nitrous_Oxide,
    x = "Year",
    y = "Emmisions_per_100k",
    hover_name = "CAV_adoption",
    color = "State",
    labels = {
        "Year":'',
        "Emmisions_per_100k": "Tons of annual emissions per 100k of population"
    }
)

Nitrous_Oxide_plot.update_layout(
#     yaxis_range=[0,6],
    font_color="white",
    legend=dict(
    title="State"
))

Nitrous_Oxide_plot.update_layout({
    'plot_bgcolor':'black',
    'paper_bgcolor':'black',
    'title':'Nitrous Oxide emisions per 100k of population'
})

Nitrous_Oxide_plot.show()

# Code from initial direction that was not used in final solution

In [None]:
# chunk = 10000
# onroad_2020 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2020_nei_onroad_123.csv")
# type(onroad_2020)
# onroad_2020.head(n=10000).to_csv("onroad_2020_head.csv")

## Define function to separate pollution data by state

In [None]:
# def pollution(df, df_name):
#     df['state'] = df['state'].astype("string")
#     for state in states:
#         if state in df['state'].unique():
#             print(f'Working on {state} in {df_name}')
#             state_df = df_name + "_" + str(state)
#             state_dfs[state] = df.loc[df['state'] == state]
#             filename = df_name + "_" + state + ".csv"
#             path = "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/" + df_name[-4:] + "_by_state" + "/" + filename
#             if os.path.isfile(path):
#                 state_dfs[state].to_csv(path, mode="a", index=False, header=False)
#             else:
#                 state_dfs[state].to_csv(path, index=False)
#     print("All Done")

In [None]:
# onroad_2020 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2020_nei_onroad_123.csv")
# pollution(onroad_2020, "onroad_2020")

# onroad_2020 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2020_nei_onroad_4.csv")
# pollution(onroad_2020, "onroad_2020")

# onroad_2020 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2020_nei_onroad_5.csv")
# pollution(onroad_2020, "onroad_2020")

# onroad_2020 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2020_nei_onroad_67.csv")
# pollution(onroad_2020, "onroad_2020")

# onroad_2020 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2020_nei_onroad_8910.csv")
# pollution(onroad_2020, "onroad_2020")



In [None]:
# onroad_2020_CA = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2020_by_state/onroad_2020_CA.csv")


In [None]:
# pollutants = []

# with open("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/Pollutants.csv") as f:
#     for line in f:
#         pollutants.append(str(line))

# print(pollutants)

## Create first iteration of pollutants list

In [None]:
# pollutants = []
# print(len(pollutants))
# with open("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/onroad_2020_CA - Pollutants.csv") as f:
#     for line in f:
#         if str(line) not in pollutants:
#             print(f"Adding {str(line)}")
#             pollutants.append(str(line))

# print(len(pollutants))

# pollutants

In [None]:
# print(pollutants)

# for pollutant in pollutants:
#     pollutants[pollutants.index(pollutant)] = pollutant[:-1]

# print('*********************************')
# for item in pollutants:
#     print(item)

In [None]:
# print(len(pollutants))

# for year in years:
#     for path in onroad_paths[year]:
#         print(f'Opening {path}')
#         f = pd.read_csv(path, low_memory=False)
#         for pollutant in f[pol_descr[year]]:
#             if str(pollutant) not in pollutants:
#                 print(f"Adding {pollutant}")
#                 pollutants.append(str(pollutant))

#     print(len(pollutants))
#     print('\n*********************************\n')

# for item in pollutants:
#     print(item)
    
# print("\n",len(pollutants))


## Writing pollution list to CSV file to check for duplicates, etc.

In [None]:
# opening the csv file in 'w+' mode
# file = open('pollutants_new.csv', 'w+')
 
# writing the data into the file
# with open("pollutants_new.csv", 'w', newline='') as file:   
#     write = csv.writer(file, delimiter=',', quotechar='"')
#     write.writerows([pollutants])

In [None]:
# data = {
#     "Polutant": pollutants,
#     2008: "", 
#     2011: "", 
#     2017: "", 
#     2020: ""
# }

In [None]:
# ca_over_time = pd.DataFrame(data)
# ca_over_time

# top_11_low_em_veh_states = [
#     'CA',
#     'WA',
#     'OR',
#     'HI',
#     'VT',
#     'MA',
#     'MD',
#     'CO',
#     'NV',
#     'AZ',
#     'VA'
#     ]

In [None]:
# onroad_2020_CA.columns
# for pollutant in pollutants:
#     ca_over_time.loc[pollutant, 2020] = sum(onroad_2020_CA[onroad_2020_CA['pollutant desc'] == pollutant]['total emissions'])

# ca_over_time

In [None]:
# ca_over_time = pd.read_csv("ca_over_time.csv")
# ca_over_time

In [None]:
# onroad_2017 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2017_nei_onroad_123.csv")
# pollution(onroad_2017, "onroad_2017")

# onroad_2017 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2017_nei_onroad_4.csv")
# pollution(onroad_2017, "onroad_2017")

# onroad_2017 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2017_nei_onroad_5.csv")
# pollution(onroad_2017, "onroad_2017")

# onroad_2017 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2017_nei_onroad_67.csv")
# pollution(onroad_2017, "onroad_2017")

# onroad_2017 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2017_nei_onroad_8910.csv")
# pollution(onroad_2017, "onroad_2017")

In [None]:
# onroad_2017_CA = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2017_by_state/onroad_2017_CA.csv")
# onroad_2017_CA
# for pollutant in pollutants:
#     ca_over_time.loc[pollutant, 2017] = sum(onroad_2017_CA[onroad_2017_CA['pollutant desc'] == pollutant]['total emissions'])

# ca_over_time

In [None]:
# def pollution_old(df, df_name):
#     df['st_usps_cd'] = df['st_usps_cd'].astype("string")
#     for state in states:
#         if state in df['st_usps_cd'].unique():
#             print(f'Working on {state} in {df_name}')
#             state_df = df_name + "_" + str(state)
#             state_dfs[state] = df.loc[df['st_usps_cd'] == state]
#             filename = df_name + "_" + state + ".csv"
#             path = "/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/" + df_name[-4:] + "_by_state" + "/" + filename
#             if os.path.isfile(path):
#                 state_dfs[state].to_csv(path, mode="a", index=False, header=False)
#             else:
#                 state_dfs[state].to_csv(path, index=False)
#     print("All Done")

In [None]:
# onroad_2014 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2014_nei_onroad_123.csv")
# pollution_old(onroad_2014, "onroad_2014")

# onroad_2014 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2014_nei_onroad_4.csv")
# pollution_old(onroad_2014, "onroad_2014")

# onroad_2014 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2014_nei_onroad_5.csv")
# pollution_old(onroad_2014, "onroad_2014")

# onroad_2014 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2014_nei_onroad_67.csv")
# pollution_old(onroad_2014, "onroad_2014")

# onroad_2014 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2014_nei_onroad_8910.csv")
# pollution_old(onroad_2014, "onroad_2014")

# onroad_2014 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2014_nei_onroad_tribes.csv")
# pollution_old(onroad_2014, "onroad_2014")  # Tribal data listed by tribe name, not by state

In [None]:
# onroad_2014_CA = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2014_by_state/onroad_2014_CA.csv")

# for pollutant in pollutants:
#     ca_over_time.loc[pollutant, 2014] = sum(onroad_2014_CA[onroad_2014_CA['pollutant_desc'] == pollutant]['total_emissions'])

# ca_over_time

In [None]:
# onroad_2011 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2011_nei_onroad_123.csv")
# pollution_old(onroad_2011, "onroad_2011")

# onroad_2011 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2011_nei_onroad_4.csv")
# pollution_old(onroad_2011, "onroad_2011")

# onroad_2011 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2011_nei_onroad_5.csv")
# pollution_old(onroad_2011, "onroad_2011")

# onroad_2011 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2011_nei_onroad_67.csv")
# pollution_old(onroad_2011, "onroad_2011")

# onroad_2011 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2011_nei_onroad_8910.csv")
# pollution_old(onroad_2011, "onroad_2011")

In [None]:
# onroad_2011_CA = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2011_by_state/onroad_2011_CA.csv")

# for pollutant in pollutants:
#     ca_over_time.loc[pollutant, 2011] = sum(onroad_2011_CA[onroad_2011_CA['description'] == pollutant]['total_emissions'])

# ca_over_time

In [None]:
# onroad_2008 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2008NEIv3_onroad123.csv")
# pollution_old(onroad_2008, "onroad_2008")

# onroad_2008 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2008NEIv3_onroad4.csv")
# pollution_old(onroad_2008, "onroad_2008")


# onroad_2008 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2008NEIv3_onroad5.csv")
# pollution_old(onroad_2008, "onroad_2008")

# onroad_2008 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2008NEIv3_onroad67.csv")
# pollution_old(onroad_2008, "onroad_2008")

# onroad_2008 = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2008NEIv3_onroad8910.csv")
# pollution_old(onroad_2008, "onroad_2008")

In [None]:
# onroad_2008_CA = pd.read_csv("/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/nie_data/2008_by_state/onroad_2008_CA.csv")

# for pollutant in pollutants:
#     ca_over_time.loc[pollutant, 2008] = sum(onroad_2008_CA[onroad_2008_CA['description'] == pollutant]['total_emissions'])

# ca_over_time

In [None]:
# ca_over_time.to_csv("ca_over_time.csv")

In [None]:
# ca_over_time = pd.read_csv("ca_over_time.csv", index_col = "Pollutant")
# ca_over_time

# DataFrames for Plots

In [None]:
# for state in bottom_10_low_em_veh_states:
#     df = pd.DataFrame()
#     for year in years:
#         filename = f"onroad_{year}_{state}.csv"
#         path = f"/Users/ScottMcL/Documents/Coding_Temple/Capstone_2/{year}_by_state/{filename}"
#         state_pollution = pd.read_csv(path, low_memory=False)
#         print(f"Working on {state} for {year}")
#         for pollutant in poc:
#             if year == 2008:
#                 df.loc[pollutant, 2008] = sum(state_pollution[state_pollution['description'] == pollutant]['total_emissions'])
#             elif year == 2011:
#                 df.loc[pollutant, 2011] = sum(state_pollution[state_pollution['description'] == pollutant]['total_emissions'])
#             elif year == 2014:
#                 df.loc[pollutant, 2014] = sum(state_pollution[state_pollution['pollutant_desc'] == pollutant]['total_emissions'])
#             elif year == 2017:
#                 df.loc[pollutant, 2017] = sum(state_pollution[state_pollution['pollutant desc'] == pollutant]['total emissions'])
#             elif year == 2020:
#                 df.loc[pollutant, 2020] = sum(state_pollution[state_pollution['pollutant desc'] == pollutant]['total emissions'])
            
#     csv_filename = f"onroad_{state}_over_time.csv"
#     df.to_csv(csv_filename)

In [None]:
# onroad_CA_over_time = pd.read_csv("onroad_CA_over_time.csv", index_col = 0)
# onroad_WA_over_time = pd.read_csv("onroad_WA_over_time.csv", index_col = 0)
# onroad_OR_over_time = pd.read_csv("onroad_OR_over_time.csv", index_col = 0)
# onroad_HI_over_time = pd.read_csv("onroad_HI_over_time.csv", index_col = 0)
# onroad_VT_over_time = pd.read_csv("onroad_VT_over_time.csv", index_col = 0)
# onroad_MA_over_time = pd.read_csv("onroad_MA_over_time.csv", index_col = 0)
# onroad_MD_over_time = pd.read_csv("onroad_MD_over_time.csv", index_col = 0)
# onroad_CO_over_time = pd.read_csv("onroad_CO_over_time.csv", index_col = 0)
# onroad_NV_over_time = pd.read_csv("onroad_NV_over_time.csv", index_col = 0)
# onroad_AZ_over_time = pd.read_csv("onroad_AZ_over_time.csv", index_col = 0)
# onroad_VA_over_time = pd.read_csv("onroad_VA_over_time.csv", index_col = 0)

# bottom_10_low_em_veh_states = ['MI', 'ND','LA','WY','AL','SD','WV','AR','OK','KY']
# onroad_MI_over_time = pd.read_csv("onroad_MI_over_time.csv", index_col = 0)
# onroad_ND_over_time = pd.read_csv("onroad_ND_over_time.csv", index_col = 0)
# onroad_LA_over_time = pd.read_csv("onroad_LA_over_time.csv", index_col = 0)
# onroad_WY_over_time = pd.read_csv("onroad_WY_over_time.csv", index_col = 0)
# onroad_AL_over_time = pd.read_csv("onroad_AL_over_time.csv", index_col = 0)
# onroad_SD_over_time = pd.read_csv("onroad_SD_over_time.csv", index_col = 0)
# onroad_WV_over_time = pd.read_csv("onroad_WV_over_time.csv", index_col = 0)
# onroad_AR_over_time = pd.read_csv("onroad_AR_over_time.csv", index_col = 0)
# onroad_OK_over_time = pd.read_csv("onroad_OK_over_time.csv", index_col = 0)
# onroad_KY_over_time = pd.read_csv("onroad_KY_over_time.csv", index_col = 0)

In [None]:
# onroad_MI_over_time

In [None]:

# health_CA = onroad_CA_over_time[onroad_CA_over_time.index.isin(poc_health)]
# health_WA = onroad_WA_over_time[onroad_WA_over_time.index.isin(poc_health)]
# health_OR = onroad_OR_over_time[onroad_OR_over_time.index.isin(poc_health)]
# health_HI = onroad_HI_over_time[onroad_HI_over_time.index.isin(poc_health)]
# health_VT = onroad_VT_over_time[onroad_VT_over_time.index.isin(poc_health)]
# health_MA = onroad_MA_over_time[onroad_MA_over_time.index.isin(poc_health)]
# health_MD = onroad_MD_over_time[onroad_MD_over_time.index.isin(poc_health)]
# health_CO = onroad_CO_over_time[onroad_CO_over_time.index.isin(poc_health)]
# health_NV = onroad_NV_over_time[onroad_NV_over_time.index.isin(poc_health)]
# health_AZ = onroad_AZ_over_time[onroad_AZ_over_time.index.isin(poc_health)]
# health_VA = onroad_VA_over_time[onroad_VA_over_time.index.isin(poc_health)]

# bottom_10_low_em_veh_states = ['MI', 'ND','LA','WY','AL','SD','WV','AR','OK','KY']
# health_MI = onroad_MI_over_time[onroad_MI_over_time.index.isin(poc_health)]
# health_ND = onroad_ND_over_time[onroad_ND_over_time.index.isin(poc_health)]
# health_LA = onroad_LA_over_time[onroad_LA_over_time.index.isin(poc_health)]
# health_WY = onroad_WY_over_time[onroad_WY_over_time.index.isin(poc_health)]
# health_AL = onroad_AL_over_time[onroad_AL_over_time.index.isin(poc_health)]
# health_SD = onroad_SD_over_time[onroad_SD_over_time.index.isin(poc_health)]
# health_WV = onroad_WV_over_time[onroad_WV_over_time.index.isin(poc_health)]
# health_AR = onroad_AR_over_time[onroad_AR_over_time.index.isin(poc_health)]
# health_OK = onroad_OK_over_time[onroad_OK_over_time.index.isin(poc_health)]
# health_KY = onroad_KY_over_time[onroad_KY_over_time.index.isin(poc_health)]

In [None]:
# health_CA = health_CA.reset_index()
# health_WA = health_WA.reset_index()
# health_OR = health_OR.reset_index()
# health_HI = health_HI.reset_index()
# health_VT = health_VT.reset_index()
# health_MA = health_MA.reset_index()
# health_MD = health_MD.reset_index()
# health_CO = health_CO.reset_index()
# health_NV = health_NV.reset_index()
# health_AZ = health_AZ.reset_index()
# health_VA = health_VA.reset_index()

# health_CA.rename(columns = {'index':'Pollutant'}, inplace = True)
# health_WA.rename(columns = {'index':'Pollutant'}, inplace = True)
# health_OR.rename(columns = {'index':'Pollutant'}, inplace = True)
# health_HI.rename(columns = {'index':'Pollutant'}, inplace = True)
# health_VT.rename(columns = {'index':'Pollutant'}, inplace = True)
# health_MA.rename(columns = {'index':'Pollutant'}, inplace = True)
# health_MD.rename(columns = {'index':'Pollutant'}, inplace = True)
# health_CO.rename(columns = {'index':'Pollutant'}, inplace = True)
# health_NV.rename(columns = {'index':'Pollutant'}, inplace = True)
# health_AZ.rename(columns = {'index':'Pollutant'}, inplace = True)
# health_VA.rename(columns = {'index':'Pollutant'}, inplace = True)

# health_MI = health_MI.reset_index()
# health_ND = health_ND.reset_index()
# health_LA = health_LA.reset_index()
# health_WY = health_WY.reset_index()
# health_AL = health_AL.reset_index()
# health_SD = health_SD.reset_index()
# health_WV = health_WV.reset_index()
# health_AR = health_AR.reset_index()
# health_OK = health_OK.reset_index()
# health_KY = health_KY.reset_index()

# health_MI.rename(columns = {'index':'Pollutant'}, inplace = True)
# health_ND.rename(columns = {'index':'Pollutant'}, inplace = True)
# health_LA.rename(columns = {'index':'Pollutant'}, inplace = True)
# health_WY.rename(columns = {'index':'Pollutant'}, inplace = True)
# health_AL.rename(columns = {'index':'Pollutant'}, inplace = True)
# health_SD.rename(columns = {'index':'Pollutant'}, inplace = True)
# health_WV.rename(columns = {'index':'Pollutant'}, inplace = True)
# health_AR.rename(columns = {'index':'Pollutant'}, inplace = True)
# health_OK.rename(columns = {'index':'Pollutant'}, inplace = True)
# health_KY.rename(columns = {'index':'Pollutant'}, inplace = True)

In [None]:
# health_MI

In [None]:
# GHG_CA = onroad_CA_over_time[onroad_CA_over_time.index.isin(poc_GHG)]
# GHG_WA = onroad_WA_over_time[onroad_WA_over_time.index.isin(poc_GHG)]
# GHG_OR = onroad_OR_over_time[onroad_OR_over_time.index.isin(poc_GHG)]
# GHG_HI = onroad_HI_over_time[onroad_HI_over_time.index.isin(poc_GHG)]
# GHG_VT = onroad_VT_over_time[onroad_VT_over_time.index.isin(poc_GHG)]
# GHG_MA = onroad_MA_over_time[onroad_MA_over_time.index.isin(poc_GHG)]
# GHG_MD = onroad_MD_over_time[onroad_MD_over_time.index.isin(poc_GHG)]
# GHG_CO = onroad_CO_over_time[onroad_CO_over_time.index.isin(poc_GHG)]
# GHG_NV = onroad_NV_over_time[onroad_NV_over_time.index.isin(poc_GHG)]
# GHG_AZ = onroad_AZ_over_time[onroad_AZ_over_time.index.isin(poc_GHG)]
# GHG_VA = onroad_VA_over_time[onroad_VA_over_time.index.isin(poc_GHG)]

# GHG_CA = GHG_CA.reset_index()
# GHG_WA = GHG_WA.reset_index()
# GHG_OR = GHG_OR.reset_index()
# GHG_HI = GHG_HI.reset_index()
# GHG_VT = GHG_VT.reset_index()
# GHG_MA = GHG_MA.reset_index()
# GHG_MD = GHG_MD.reset_index()
# GHG_CO = GHG_CO.reset_index()
# GHG_NV = GHG_NV.reset_index()
# GHG_AZ = GHG_AZ.reset_index()
# GHG_VA = GHG_VA.reset_index()

# GHG_CA.rename(columns = {'index':'Pollutant'}, inplace = True)
# GHG_WA.rename(columns = {'index':'Pollutant'}, inplace = True)
# GHG_OR.rename(columns = {'index':'Pollutant'}, inplace = True)
# GHG_HI.rename(columns = {'index':'Pollutant'}, inplace = True)
# GHG_VT.rename(columns = {'index':'Pollutant'}, inplace = True)
# GHG_MA.rename(columns = {'index':'Pollutant'}, inplace = True)
# GHG_MD.rename(columns = {'index':'Pollutant'}, inplace = True)
# GHG_CO.rename(columns = {'index':'Pollutant'}, inplace = True)
# GHG_NV.rename(columns = {'index':'Pollutant'}, inplace = True)
# GHG_AZ.rename(columns = {'index':'Pollutant'}, inplace = True)
# GHG_VA.rename(columns = {'index':'Pollutant'}, inplace = True)

# bottom_10_low_em_veh_states = ['MI', 'ND','LA','WY','AL','SD','WV','AR','OK','KY']
# GHG_MI = onroad_MI_over_time[onroad_MI_over_time.index.isin(poc_GHG)]
# GHG_ND = onroad_ND_over_time[onroad_ND_over_time.index.isin(poc_GHG)]
# GHG_LA = onroad_LA_over_time[onroad_LA_over_time.index.isin(poc_GHG)]
# GHG_WY = onroad_WY_over_time[onroad_WY_over_time.index.isin(poc_GHG)]
# GHG_AL = onroad_AL_over_time[onroad_AL_over_time.index.isin(poc_GHG)]
# GHG_SD = onroad_SD_over_time[onroad_SD_over_time.index.isin(poc_GHG)]
# GHG_WV = onroad_WV_over_time[onroad_WV_over_time.index.isin(poc_GHG)]
# GHG_AR = onroad_AR_over_time[onroad_AR_over_time.index.isin(poc_GHG)]
# GHG_OK = onroad_OK_over_time[onroad_OK_over_time.index.isin(poc_GHG)]
# GHG_KY = onroad_KY_over_time[onroad_KY_over_time.index.isin(poc_GHG)]

# GHG_MI = GHG_MI.reset_index()
# GHG_ND = GHG_ND.reset_index()
# GHG_LA = GHG_LA.reset_index()
# GHG_WY = GHG_WY.reset_index()
# GHG_AL = GHG_AL.reset_index()
# GHG_SD = GHG_SD.reset_index()
# GHG_WV = GHG_WV.reset_index()
# GHG_AR = GHG_AR.reset_index()
# GHG_OK = GHG_OK.reset_index()
# GHG_KY = GHG_KY.reset_index()

# GHG_MI.rename(columns = {'index':'Pollutant'}, inplace = True)
# GHG_ND.rename(columns = {'index':'Pollutant'}, inplace = True)
# GHG_LA.rename(columns = {'index':'Pollutant'}, inplace = True)
# GHG_WY.rename(columns = {'index':'Pollutant'}, inplace = True)
# GHG_AL.rename(columns = {'index':'Pollutant'}, inplace = True)
# GHG_SD.rename(columns = {'index':'Pollutant'}, inplace = True)
# GHG_WV.rename(columns = {'index':'Pollutant'}, inplace = True)
# GHG_AR.rename(columns = {'index':'Pollutant'}, inplace = True)
# GHG_OK.rename(columns = {'index':'Pollutant'}, inplace = True)
# GHG_KY.rename(columns = {'index':'Pollutant'}, inplace = True)

In [None]:
# health_CA = health_CA.reset_index()
# health_CA.rename(columns = {'index':'Pollutant'}, inplace = True)
# health_KY
# GHG_OK

# Export to CSV to plot in excel
##### DataFrames are not set up for plotly

In [None]:
# years = [2008, 2011, 2014, 2017, 2020]
# CA_GHG_plot = px.line(
#     health_CA,
#     x = years,
#     y = poc_GHG,
# #     color = "Pollutant"
# )

# # CA_GHG_plot.update_layout(linemode = 'group')

# CA_health_plot.show()

In [None]:
# health_CA.to_csv("health_CA.csv", index = False)
# health_WA.to_csv("health_WA.csv", index = False)
# health_OR.to_csv("health_OR.csv", index = False)
# health_HI.to_csv("health_HI.csv", index = False)
# health_VT.to_csv("health_VT.csv", index = False)
# health_MA.to_csv("health_MA.csv", index = False)
# health_MD.to_csv("health_MD.csv", index = False)
# health_CO.to_csv("health_CO.csv", index = False)
# health_NV.to_csv("health_NV.csv", index = False)
# health_AZ.to_csv("health_AZ.csv", index = False)
# health_VA.to_csv("health_VA.csv", index = False)
# GHG_CA.to_csv("GHG_CA.csv", index = False)
# GHG_WA.to_csv("GHG_WA.csv", index = False)
# GHG_OR.to_csv("GHG_OR.csv", index = False)
# GHG_HI.to_csv("GHG_HI.csv", index = False)
# GHG_VT.to_csv("GHG_VT.csv", index = False)
# GHG_MA.to_csv("GHG_MA.csv", index = False)
# GHG_MD.to_csv("GHG_MD.csv", index = False)
# GHG_CO.to_csv("GHG_CO.csv", index = False)
# GHG_NV.to_csv("GHG_NV.csv", index = False)
# GHG_AZ.to_csv("GHG_AZ.csv", index = False)
# GHG_VA.to_csv("GHG_VA.csv", index = False)

# health_MI.to_csv("health_MI.csv", index = False)
# health_ND.to_csv("health_ND.csv", index = False)
# health_LA.to_csv("health_LA.csv", index = False)
# health_WY.to_csv("health_WY.csv", index = False)
# health_AL.to_csv("health_AL.csv", index = False)
# health_SD.to_csv("health_SD.csv", index = False)
# health_WV.to_csv("health_WV.csv", index = False)
# health_AR.to_csv("health_AR.csv", index = False)
# health_OK.to_csv("health_OK.csv", index = False)
# health_KY.to_csv("health_KY.csv", index = False)
# GHG_MI.to_csv("GHG_MI.csv", index = False)
# GHG_ND.to_csv("GHG_ND.csv", index = False)
# GHG_LA.to_csv("GHG_LA.csv", index = False)
# GHG_WY.to_csv("GHG_WY.csv", index = False)
# GHG_AL.to_csv("GHG_AL.csv", index = False)
# GHG_SD.to_csv("GHG_SD.csv", index = False)
# GHG_WV.to_csv("GHG_WV.csv", index = False)
# GHG_AR.to_csv("GHG_AR.csv", index = False)
# GHG_OK.to_csv("GHG_OK.csv", index = False)
# GHG_KY.to_csv("GHG_KY.csv", index = False)

In [None]:
# nei_by_pollutant = pd.DataFrame({
#     "Pollutant":pd.Series(dtype='str'),
#     "Symbol": pd.Series(dtype='str'),
#     "State":pd.Series(dtype='str'),
#     "CAV_adoption":pd.Series(dtype='str'),
#     "Year":pd.Series(dtype='int'),
#     "Concern":pd.Series(dtype='str'),
#     "Total Emissions":pd.Series(dtype='float'),
#     "UOM":pd.Series(dtype='str')
# }, index = ["Pollutant"])

# nei_by_pollutant.reset_index()
# print(type(nei_by_pollutant))