In [1]:
# Importing all the necessary libaries that will be used through the project

import pandas as pd
import plotly_express as px
import plotly.graph_objects as go
import seaborn as sns


In [3]:
# Read in the 1980 and 2021 data to compare air quality in lockdown and before lockdown

oldAir_df = pd.read_csv("daily_aqi_by_county_1980.csv")
newAir_df = pd.read_csv("daily_aqi_by_county_2021.csv")

In [5]:
# Keeping only the necessary columns, as well as identifers (Date)

oldAir_df = oldAir_df[["State Name", "county Name", "Date", "AQI", "Category"]]
newAir_df = newAir_df[["State Name", "county Name", "Date", "AQI", "Category"]]

# How many counties/states are in the data. To see what would be feasible to display

new_states = newAir_df['State Name'].unique()
print(len(new_states))

# To see the data
result_old = oldAir_df[(oldAir_df['State Name'] == "Alabama") & (oldAir_df['county Name'] == "Tuscaloosa")]
result_new = newAir_df[(newAir_df['State Name'] == "Alabama") & (newAir_df['county Name'] == "Tuscaloosa")]
print(result_old)
#print(result_new)


54
     State Name county Name        Date  AQI  Category
2488    Alabama  Tuscaloosa  1980-05-20   40      Good
2489    Alabama  Tuscaloosa  1980-05-21   74  Moderate
2490    Alabama  Tuscaloosa  1980-05-22   42      Good
2491    Alabama  Tuscaloosa  1980-05-23   67  Moderate
2492    Alabama  Tuscaloosa  1980-05-24   77  Moderate
...         ...         ...         ...  ...       ...
2615    Alabama  Tuscaloosa  1980-12-27   28      Good
2616    Alabama  Tuscaloosa  1980-12-28   32      Good
2617    Alabama  Tuscaloosa  1980-12-29   28      Good
2618    Alabama  Tuscaloosa  1980-12-30   14      Good
2619    Alabama  Tuscaloosa  1980-12-31   31      Good

[132 rows x 5 columns]


In [6]:
# To display the average AQI per state in old and new data

old_average = oldAir_df.groupby('State Name')['AQI'].mean().reset_index()
new_average = newAir_df.groupby('State Name')['AQI'].mean().reset_index()

# Match with every state to map to a heatmap, link for Kaggle data set in the report
state_info = pd.read_csv("US_GeoCode.csv")
#print(state_info)


# Adding Latitudes and Longitudes to all the dataframes
old_average['Latitude'] = None
old_average['Longitude'] = None

new_average['Latitude'] = None
new_average['Longitude'] = None


# Loop through the dataframes and assign values to latitude and longitude
for index, data in old_average.iterrows():
    name = data['State Name']
    state_loc = state_info.loc[state_info['Name'] == name]
    
    if len(state_loc['latitude'].values) != 0:
            old_average.at[index, 'Latitude'] = state_loc['latitude'].values[0]
            old_average.at[index, 'Longitude'] = state_loc['longitude'].values[0]

for index, data in new_average.iterrows():
    name = data['State Name']
    state_loc = state_info.loc[state_info['Name'] == name]
    
    if len(state_loc['latitude'].values) != 0:
            new_average.at[index, 'Latitude'] = state_loc['latitude'].values[0]
            new_average.at[index, 'Longitude'] = state_loc['longitude'].values[0]
        
# Check the dataframes
print(old_average)
print(new_average)

              State Name        AQI   Latitude   Longitude
0                Alabama  65.240076  32.318231  -86.902298
1                 Alaska  45.604414  63.588753 -154.493062
2                Arizona  93.704272  34.048928 -111.093731
3               Arkansas  41.818182   35.20105  -91.831833
4             California  68.606929  36.778261 -119.417932
5               Colorado  49.718805  39.550051 -105.782067
6            Connecticut  89.888818  41.603221  -73.087749
7               Delaware  53.458580  38.910832   -75.52767
8   District Of Columbia  79.226776       None        None
9                Florida  45.447225  27.664827  -81.515754
10               Georgia  35.517600  32.157435  -82.907123
11                Hawaii  16.803324  19.898682 -155.665857
12                 Idaho  90.588599  44.068202 -114.742041
13              Illinois  58.990562  40.633125  -89.398528
14               Indiana  66.820131  40.551217  -85.602364
15                  Iowa  42.738400  41.878003  -93.0977

In [7]:
# Constructing heatmaps for 1980 and 2021, displaying each state's average AQI with a scatterpoint on the colorscale
# Using Plotly Express

fig = px.density_mapbox(
        old_average,
        lat='Latitude',
        lon='Longitude',
        z='AQI',
        opacity=0,
        color_continuous_scale='icefire',
        radius=40,
        zoom=2,
        mapbox_style='open-street-map',
        height=600
    )

fig.add_trace(
        go.Scattermapbox(
            lat=old_average["Latitude"],
            lon=old_average["Longitude"],
            mode="markers",
            showlegend=False,
            hoverinfo="skip",
            marker={
                "color": old_average["AQI"],
                "size": old_average["AQI"].fillna(0),
                "coloraxis": "coloraxis",
                "sizemode": "area",
            },
        )
    )

fig_two = px.density_mapbox(
        new_average,
        lat='Latitude',
        lon='Longitude',
        z='AQI',
        opacity=0,
        color_continuous_scale='icefire',
        radius=40,
        zoom=2,
        mapbox_style='open-street-map',
        height=600
    )

fig_two.add_trace(
        go.Scattermapbox(
            lat=new_average["Latitude"],
            lon=new_average["Longitude"],
            mode="markers",
            showlegend=False,
            hoverinfo="skip",
            marker={
                "color": new_average["AQI"],
                "size": new_average["AQI"].fillna(0),
                "coloraxis": "coloraxis",
                "sizemode": "area",
            },
        )
    )

fig.update_layout(title_text='1980 AQI Heatmap')
fig_two.update_layout(title_text='2021 (LOCKDOWN) AQI Heatmap')

fig.show()
fig_two.show()