In [13]:
import requests
import pandas as pd 
import json
import calendar
import hvplot.pandas
import folium
from folium.plugins import HeatMap
from geopy.exc import GeocoderTimedOut
from geopy.geocoders import Nominatim
from bokeh.models.formatters import DatetimeTickFormatter
formatter = DatetimeTickFormatter(months='%Y %b')
longitude = []
latitude = []
def findGeocode(state):
    try:   
        # Specify the user_agent 
        geolocator = Nominatim(user_agent="group_1")  
        return geolocator.geocode(state)  
    except GeocoderTimedOut:   
        return findGeocode(state) 

# Unemployment Data
To get the unemployment data, we used The Bureau of Labor Statistics' (BLS) Public Data Application Programming Interface (API) which gives public access to economic data from all BLS programs. We then "pickled" the data and "unpickled" the dataframe here:

In [2]:
all_unemployment_data = pd.read_pickle("personal-Akanksha/pickled_unemploy_rate.pkl")
all_unemployment_data.sort_values('State', inplace = True)
all_unemployment_data['Unemployment Rate'] = all_unemployment_data['Unemployment Rate'].astype(float)
all_unemployment_data

Unnamed: 0,State,Year,Month,Unemployment Rate
4501,Alabama,2021,November,2.6
4515,Alabama,2020,September,6.0
4514,Alabama,2020,October,4.9
4513,Alabama,2020,November,4.4
4512,Alabama,2020,December,4.1
...,...,...,...,...
4466,Wyoming,2021,October,3.5
4467,Wyoming,2021,September,3.4
4468,Wyoming,2021,August,3.9
4470,Wyoming,2021,June,5.0


## Split the data by year

### 2019 Data:

In [3]:
unemployment_2019 = all_unemployment_data.loc[all_unemployment_data['Year']== '2019'].copy()
unemployment_2019['Year Month'] = unemployment_2019['Year'] +' '+ unemployment_2019['Month']
unemployment_2019 = unemployment_2019[['Year Month', 'State','Unemployment Rate']]
unemployment_2019['Year Month'] = pd.to_datetime(unemployment_2019['Year Month'])
unemployment_2019.set_index('Year Month')
plot_2019 = unemployment_2019.hvplot(y='Unemployment Rate', x = 'Year Month',groupby='State', rot = 90, label='Unemployment Rates-2019', ylim = (0,10), xformatter = formatter)

### 2020 Data:

In [4]:
unemployment_2020 = all_unemployment_data.loc[all_unemployment_data['Year']== '2020'].copy()
unemployment_2020['Year Month'] = unemployment_2020['Year'] +' '+ unemployment_2020['Month']
unemployment_2020 = unemployment_2020[['Year Month', 'State','Unemployment Rate']]
unemployment_2020['Year Month'] = pd.to_datetime(unemployment_2020['Year Month'])
unemployment_2020.set_index('Year Month')
plot_2020 = unemployment_2020.hvplot(y='Unemployment Rate', x = 'Year Month',groupby='State', rot = 90, label='Unemployment Rates-2020', ylim = (0,10), xformatter = formatter)

### 2021 Data:

In [5]:
unemployment_2021 = all_unemployment_data.loc[all_unemployment_data['Year']== '2021'].copy()
unemployment_2021['Year Month'] = unemployment_2021['Year'] +' '+ unemployment_2021['Month']
unemployment_2021 = unemployment_2021[['Year Month', 'State','Unemployment Rate']]
unemployment_2021['Year Month'] = pd.to_datetime(unemployment_2021['Year Month'])
unemployment_2021.set_index('Year Month')
plot_2021 = unemployment_2021.hvplot(y='Unemployment Rate', x = 'Year Month',groupby='State', rot = 90, label='Unemployment Rates-2021', ylim = (0,10), xformatter = formatter)

## Plot all Unemployment Data

In [6]:
plot_all_year_unemployment= plot_2019*plot_2020*plot_2021
plot_all_year_unemployment.opts(title='Unemployment Rates 2019-2021 by State', legend_position='bottom',frame_height = 200, ylim=(1,20))

# Location Data
We used geopy to get the Longitude and Latitude for each state

In [7]:
states = all_unemployment_data['State'].unique()
states

array(['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',
       'Colorado', 'Connecticut', 'Delaware', 'Florida', 'Georgia',
       'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas',
       'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts',
       'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana',
       'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico',
       'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma',
       'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina',
       'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont',
       'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming'],
      dtype=object)

In [8]:
for i in states:
    if findGeocode(i) != None:     
        loc = findGeocode(i)
        if loc.latitude not in latitude:
            latitude.append(loc.latitude)
        if loc.longitude not in longitude:
            longitude.append(loc.longitude)
    else:
        latitude.append('')
        longitude.append('')

df_location = pd.DataFrame(columns=['State'])
df_location['State'] = states
df_location['Latitude'] = latitude
df_location['Longitude'] = longitude
df_location.head()

Unnamed: 0,State,Latitude,Longitude
0,Alabama,33.258882,-86.829534
1,Alaska,64.445961,-149.680909
2,Arizona,34.395342,-111.763275
3,Arkansas,35.204888,-92.447911
4,California,36.701463,-118.755997


# Combine Unemployment Rates with Location
We then had to groupby each state and find the average Unemployment rate for each year. Then we combined that with the location coordinates and plotted them.

## 2019:

In [9]:
unemploy_by_state_2019 = unemployment_2019.groupby('State').mean()
unemploy_by_state_2019 = unemploy_by_state_2019.reset_index()
unemploy_by_state_2019.head()

Unnamed: 0,State,Unemployment Rate
0,Alabama,3.175
1,Alaska,5.475
2,Arizona,4.841667
3,Arkansas,3.466667
4,California,4.108333


In [10]:
coordinates = pd.concat([unemploy_by_state_2019,df_location],axis = 'columns', join='inner')
coordinates = coordinates.T.drop_duplicates().T
coordinates['Unemployment Rate'] = coordinates['Unemployment Rate'].astype(float)
coordinates.head()

Unnamed: 0,State,Unemployment Rate,Latitude,Longitude
0,Alabama,3.175,33.258882,-86.829534
1,Alaska,5.475,64.445961,-149.680909
2,Arizona,4.841667,34.395342,-111.763275
3,Arkansas,3.466667,35.204888,-92.447911
4,California,4.108333,36.701463,-118.755997


In [11]:
coordinates.hvplot.points(
    x='Longitude', 
    y='Latitude',
    xlabel='Longitude',
    ylabel='Latitude',
    geo=True, 
    tiles='OSM', 
    frame_width=700, 
    frame_height=500,
    label='2019 Unemployment',
    hover_cols=['State','Unemployment Rate'],
    size='Unemployment Rate',
    color= 'Unemployment Rate',
    scale=7
)

In [18]:
meanLat = df_location['Latitude'].mean()
meanLong = df_location['Longitude'].mean()
mapObj = folium.Map(location=[meanLat, meanLong], zoom_start = 14.5)
# create heatmap layer
heatmap = HeatMap(list(zip(coordinates['Latitude'], coordinates['Longitude'], coordinates['Unemployment Rate'])), min_opacity=0.2, radius=50, blur=50, max_zoom=1)
# add heatmap layer to base map
heatmap.add_to(mapObj)
mapObj
