In [None]:
import pandas as pd
import altair as alt
from vega_datasets import data

# Disable the row limit in Altair
alt.data_transformers.disable_max_rows()

# Load and prepare the dataset
data_path = 'unemployment_by_state.csv'
df = pd.read_csv(data_path)

# Remove commas and whitespace, and convert relevant columns to the right types
df['Percent (%) of Labor Force Unemployed in State/Area'] = (
    df['Percent (%) of Labor Force Unemployed in State/Area']
    .replace('[,]', '', regex=True)  # Remove commas
    .astype(float)
)

df['Year'] = df['Year'].astype(int)

df = df[~df['FIPS Code'].isin([37, 51000])]  # Filter out county-level entries
df['FIPS Code'] = df['FIPS Code'].astype(int).astype(str)  # Convert FIPS to string without padding

# Aggregate data by year and state
df_yearly = df.groupby(['FIPS Code', 'State/Area', 'Year']).agg({
    'Percent (%) of Labor Force Unemployed in State/Area': 'mean'
}).reset_index()

# Load US States geo data
us_states = alt.topo_feature(data.us_10m.url, 'states')

# Create a slider for the year
year_slider = alt.binding_range(min=df_yearly['Year'].min(), max=df_yearly['Year'].max(), step=1)
year_select = alt.selection_single(name="Year", fields=['Year'], bind=year_slider, init={'Year': df_yearly['Year'].min()})

# Background map
background = alt.Chart(us_states).mark_geoshape(
    fill='lightgray',
    stroke='white'
).properties(
    width=800,
    height=500
).project('albersUsa')

# Map layer with unemployment data
map_chart = alt.Chart(us_states).mark_geoshape().encode(
    color=alt.Color(
        'Percent (%) of Labor Force Unemployed in State/Area:Q',
        scale=alt.Scale(scheme='reds'),
        title='Unemployment Rate (%)'
    ),
    tooltip=[
        alt.Tooltip('State/Area:N', title='State'),
        alt.Tooltip('Percent (%) of Labor Force Unemployed in State/Area:Q', title='Unemployment Rate (%)'),
        alt.Tooltip('Year:Q', title='Year')
    ]
).transform_lookup(
    lookup='id',  # Ensure 'id' matches FIPS code format
    from_=alt.LookupData(df_yearly, 'FIPS Code', ['State/Area', 'Year', 'Percent (%) of Labor Force Unemployed in State/Area'])
).transform_filter(
    year_select  # Filter by selected year
).properties(
    title='Unemployment Rate by U.S. State Over Time'
)

# Combine background and map layers and add the slider selection
chart = (background + map_chart).add_selection(year_select)

chart