In [1]:
#!pip install altair==5.0.0rc1
#!pip install folium

In [2]:
import folium
import pandas as pd
import numpy as np

In [3]:
import altair as alt
from vega_datasets import data

counties = alt.topo_feature(data.us_10m.url, 'counties')
source = data.unemployment.url
source

'https://cdn.jsdelivr.net/npm/vega-datasets@v1.29.0/data/unemployment.tsv'

In [4]:
df = pd.read_csv('https://cdn.jsdelivr.net/npm/vega-datasets@v1.29.0/data/unemployment.tsv', sep='\t')
df

Unnamed: 0,id,rate
0,1001,0.097
1,1003,0.091
2,1005,0.134
3,1007,0.121
4,1009,0.099
...,...,...
3213,72145,0.176
3214,72147,0.277
3215,72149,0.198
3216,72151,0.241


In [5]:
data.us_10m.url

'https://cdn.jsdelivr.net/npm/vega-datasets@v1.29.0/data/us-10m.json'

In [6]:
#import requests

#url = 'https://cdn.jsdelivr.net/npm/vega-datasets@v1.29.0/data/us-10m.json'
#response = requests.get(url)

#with open('us-10m.json', 'wb') as file:
    #file.write(response.content)

In [7]:
import json

with open("us-10m.json", "r") as file:
    topojson_data = json.load(file)

for feature in topojson_data["objects"]["states"]["geometries"]:
    print(f"FIPS: {feature['id']}, Type: {type(feature['id'])}")

FIPS: 2, Type: <class 'int'>
FIPS: 15, Type: <class 'int'>
FIPS: 72, Type: <class 'int'>
FIPS: 1, Type: <class 'int'>
FIPS: 5, Type: <class 'int'>
FIPS: 4, Type: <class 'int'>
FIPS: 6, Type: <class 'int'>
FIPS: 8, Type: <class 'int'>
FIPS: 9, Type: <class 'int'>
FIPS: 11, Type: <class 'int'>
FIPS: 10, Type: <class 'int'>
FIPS: 12, Type: <class 'int'>
FIPS: 13, Type: <class 'int'>
FIPS: 19, Type: <class 'int'>
FIPS: 16, Type: <class 'int'>
FIPS: 17, Type: <class 'int'>
FIPS: 18, Type: <class 'int'>
FIPS: 20, Type: <class 'int'>
FIPS: 21, Type: <class 'int'>
FIPS: 22, Type: <class 'int'>
FIPS: 25, Type: <class 'int'>
FIPS: 24, Type: <class 'int'>
FIPS: 23, Type: <class 'int'>
FIPS: 26, Type: <class 'int'>
FIPS: 27, Type: <class 'int'>
FIPS: 29, Type: <class 'int'>
FIPS: 28, Type: <class 'int'>
FIPS: 30, Type: <class 'int'>
FIPS: 37, Type: <class 'int'>
FIPS: 38, Type: <class 'int'>
FIPS: 31, Type: <class 'int'>
FIPS: 33, Type: <class 'int'>
FIPS: 34, Type: <class 'int'>
FIPS: 35, Type: <

In [8]:
chart = alt.Chart(counties).mark_geoshape().encode(
    color='rate:Q',
    tooltip=['rate:Q', 'id:N']
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(source, 'id', ['rate'])
).project(
    type='albersUsa'
).properties(
    width=500,
    height=300
)

chart

In [9]:
#Data Sources of Unemployment
#https://www.bls.gov/web/laus/ststdnsadata.zip
#https://www.bls.gov/web/laus/ststdsadata.zip
#http://www.bls.gov/lau/staadata.zip

In [10]:
state_year = pd.read_excel("staadata.xlsx", sheet_name = 0, header = 7, usecols = "A:J")
state_year.columns = ["fips", "state", "year", "pop", "clf", "pc_clf", "emp", "pc_emp", "unem", "unem_rate"]
print(state_year)

      fips          state  year       pop      clf  pc_clf      emp  pc_emp   
0        1        Alabama  1976   2632667  1499637    57.0  1398848    53.1  \
1        2         Alaska  1976    239917   164014    68.4   151501    63.1   
2        4        Arizona  1976   1650917   981368    59.4   885146    53.6   
3        5       Arkansas  1976   1546583   893588    57.8   831795    53.8   
4        6     California  1976  15823750  9894236    62.5  8985601    56.8   
...    ...            ...   ...       ...      ...     ...      ...     ...   
2486    51       Virginia  2022   6839542  4435858    64.9  4308805    63.0   
2487    53     Washington  2022   6223443  3990343    64.1  3822319    61.4   
2488    54  West Virginia  2022   1435928   785115    54.7   754453    52.5   
2489    55      Wisconsin  2022   4739794  3082128    65.0  2992049    63.1   
2490    56        Wyoming  2022    457895   291756    63.7   281343    61.4   

        unem  unem_rate  
0     100789        6.7  

In [11]:
state_year.groupby("year").size().to_frame().T

var_label = {
    "fips": "FIPS code",
    "state": "State or area",
    "year": "Year",
    "pop": "Civilian non-institutional population",
    "clf": "Total number of people in civilian labor force",
    "pc_clf": "Labor force participation rate (= labor force / population; Age: 16 years and over)",
    "emp": "Total number of people employed",
    "pc_emp": "Employment-population ratio (= employment / population; Age: 16 years and over)",
    "unem": "Total number of people unemployed",
    "unem_rate": "Unemployment rate (= unemployment / labor force; Age: 16 years and over)",
}

# Drop LA and NYC
state_year = state_year[~state_year["fips"].isin(["037", "51000"])]

print(state_year)

      fips          state  year       pop      clf  pc_clf      emp  pc_emp   
0        1        Alabama  1976   2632667  1499637    57.0  1398848    53.1  \
1        2         Alaska  1976    239917   164014    68.4   151501    63.1   
2        4        Arizona  1976   1650917   981368    59.4   885146    53.6   
3        5       Arkansas  1976   1546583   893588    57.8   831795    53.8   
4        6     California  1976  15823750  9894236    62.5  8985601    56.8   
...    ...            ...   ...       ...      ...     ...      ...     ...   
2486    51       Virginia  2022   6839542  4435858    64.9  4308805    63.0   
2487    53     Washington  2022   6223443  3990343    64.1  3822319    61.4   
2488    54  West Virginia  2022   1435928   785115    54.7   754453    52.5   
2489    55      Wisconsin  2022   4739794  3082128    65.0  2992049    63.1   
2490    56        Wyoming  2022    457895   291756    63.7   281343    61.4   

        unem  unem_rate  
0     100789        6.7  

In [12]:
# Save it!
state_year.to_csv("state_year.csv", index=False)

In [13]:
# Load state_year.csv data
df = pd.read_csv("state_year.csv")

# Convert data to long format
source = state_year.rename(columns={"fips": "FIPS", "unem_rate": "UnemploymentRate"})[["FIPS", "state", "year", "UnemploymentRate"]]
source['FIPS'] = source['FIPS'].astype(int)
source['FIPS'] = source['FIPS'].apply(lambda x: int(str(x).zfill(2)))
source = source[["FIPS", "state", "year", "UnemploymentRate"]]
usvi_data = pd.DataFrame({"FIPS": [78], "state": ["U.S. Virgin Islands"], "year": [np.nan], "UnemploymentRate": [np.nan]})
source = pd.concat([source, usvi_data], ignore_index=True)
source['year'] = source['year'].astype(str)

source

Unnamed: 0,FIPS,state,year,UnemploymentRate
0,1,Alabama,1976.0,6.7
1,2,Alaska,1976.0,7.6
2,4,Arizona,1976.0,9.8
3,5,Arkansas,1976.0,6.9
4,6,California,1976.0,9.2
...,...,...,...,...
2487,53,Washington,2022.0,4.2
2488,54,West Virginia,2022.0,3.9
2489,55,Wisconsin,2022.0,2.9
2490,56,Wyoming,2022.0,3.6


In [14]:
source.dtypes

FIPS                  int64
state                object
year                 object
UnemploymentRate    float64
dtype: object

In [15]:
# US states TopoJSON data
us_states = alt.topo_feature(data.us_10m.url, "states")

# Convert the 'year' field in merged_data to a string
source['year'] = source['year'].astype(str)

# Create year selector
year_selector = alt.binding_select(options=[str(y) for y in range(1976, 2023)], name="Select Year: ")
year_select = alt.selection_single(fields=["year"], bind=year_selector, init={"year": "1976"})

# Create a base chart with merged data
base = alt.Chart(us_states)

In [16]:
# Create year selector
year_selector = alt.binding_select(options=[str(y) for y in range(1976, 2023)], name="Select Year: ")
year_select = alt.selection_single(fields=["year"], bind=year_selector, init={"year": "1976"})

# Create a base chart with merged data
base = alt.Chart(us_states)

# Filter data by the selected year and perform a lookup
merged_data_filtered = base.transform_filter(year_select).transform_lookup(
    lookup="id",
    from_=alt.LookupData(source, "FIPS", ["state", "UnemploymentRate"])
)

# Create a map with state shapes based on the filtered merged data
state_map = merged_data_filtered.mark_geoshape().encode(
    color=alt.Color("UnemploymentRate:Q", scale=alt.Scale(scheme="reds")),
    tooltip=[
        alt.Tooltip("id:N", title="FIPS"),
        alt.Tooltip("state:N", title="State"),
        alt.Tooltip("UnemploymentRate:Q", title="Unemployment Rate")
    ],
).project(
    type="albersUsa"
).properties(
    title="US Unemployment Rates by State (1976-2022)",
    width=800,
    height=500
).add_selection(year_select)

state_map