## Setup
Import modules

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import matplotlib.colors as mcolors
import seaborn as sns
import geopandas as gpd
from shapely.geometry import Polygon
import os
import wget
import openpyxl
import math

Optionally `wget` the shapefile for the US states

In [None]:
# wget.download("https://www2.census.gov/geo/tiger/GENZ2022/shp/cb_2022_us_state_500k.zip")

## State to state abbreviation
Read in the state abbreviations csv and create a dictionary for quick conversion

In [None]:
df_abbrev = pd.read_csv("state_abbreviations.csv")
abbreviation = pd.Series(df_abbrev['Abbreviation'].values, index=df_abbrev['State'].str.lower()).to_dict()

## Data Preparation
Read in state data and filter it so theres only one value per state

In [None]:
VAL_COLUMN = 'below_poverty_line_percent'

df = pd.read_csv("../../udataset/state_data.csv")

df = df[df['year'] == 2022]
df = df[['state', VAL_COLUMN]]
df = df[df['state'] != 'puerto rico']
df.reset_index(drop=True, inplace=True)

Adjust the values for easier plotting

In [None]:
df[VAL_COLUMN] = df[VAL_COLUMN] * 100
df[VAL_COLUMN] = round(df[VAL_COLUMN], 1)

Convert the state names into their abbreviations

In [None]:
df['state'] = df['state'].apply(lambda x: abbreviation[x])

## Load the spacial data
Read in the geospatial data using `geopandas`

In [None]:
gdf = gpd.read_file('cb_2022_us_state_500k')

Merge the geospatial with the state data

In [None]:
gdf = gdf.merge(df,left_on='STUSPS',right_on='state')

In [None]:
# Create a "copy" of gdf for re-projecting
visframe = gdf.to_crs({'init':'epsg:2163'})

# create figure and axes for with Matplotlib for main map
fig, ax = plt.subplots(1, figsize=(18, 14))
# remove the axis box from the main map
ax.axis('off')

# create map of all mainland states
visframe[~visframe.state.isin(['HI','AK'])].plot(color='lightblue', linewidth=0.8, ax=ax, edgecolor='0.8')

# Add Alaska Axis (x, y, width, height)
ak_ax = fig.add_axes([0.1, 0.17, 0.17, 0.16])

# Add Hawaii Axis(x, y, width, height)
hi_ax = fig.add_axes([.28, 0.20, 0.1, 0.1])

In [None]:
def makeColorColumn(gdf,variable,vmin,vmax):
    norm = mcolors.Normalize(vmin=vmin, vmax=vmax, clip=True)
    mapper = plt.cm.ScalarMappable(norm=norm, cmap=plt.cm.YlOrBr)
    gdf['value_determined_color'] = gdf[variable].apply(lambda x: mcolors.to_hex(mapper.to_rgba(x)))
    return gdf

In [None]:
# make a column for value_determined_color in gdf
# set the range for the choropleth values with the upper bound the rounded up maximum value
vmin, vmax = gdf[VAL_COLUMN].min(), gdf[VAL_COLUMN].max() #math.ceil(gdf.pct_food_insecure.max())
# Choose the continuous colorscale "YlOrBr" from https://matplotlib.org/stable/tutorials/colors/colormaps.html
colormap = "YlOrBr"
gdf = makeColorColumn(gdf,VAL_COLUMN,vmin,vmax)
alaska_gdf = makeColorColumn(alaska_gdf,VAL_COLUMN,vmin,vmax)
hawaii_gdf = makeColorColumn(hawaii_gdf,VAL_COLUMN,vmin,vmax)

# create "visframe" as a re-projected gdf using EPSG 2163
visframe = gdf.to_crs({'init':'epsg:2163'})



# create figure and axes for Matplotlib
fig, ax = plt.subplots(1, figsize=(18, 14))
# remove the axis box around the vis
ax.axis('off')

# add a title and annotation
ax.set_title('Title')

# Create colorbar legend
fig = ax.get_figure()
# add colorbar axes to the figure
# This will take some iterating to get it where you want it [l,b,w,h] right
# l:left, b:bottom, w:width, h:height; in normalized unit (0-1)
cbax = fig.add_axes([0.89, 0.21, 0.03, 0.31])   

cbax.set_title('Color Bar')

# add color scale
sm = plt.cm.ScalarMappable(cmap=colormap, \
                 norm=plt.Normalize(vmin=vmin, vmax=vmax))
# reformat tick labels on legend
sm._A = []
comma_fmt = FuncFormatter(lambda x, p: format(x/100, '.0%'))
fig.colorbar(sm, cax=cbax, format=comma_fmt)
tick_font_size = 16
cbax.tick_params(labelsize=tick_font_size)


# create map
# Note: we're going state by state here because of unusual coloring behavior when trying to plot the entire dataframe using the "value_determined_color" column
for row in visframe.itertuples():
    if row.state not in ['AK','HI']:
        vf = visframe[visframe.state==row.state]
        c = gdf[gdf.state==row.state][0:1].value_determined_color.item()
        vf.plot(color=c, linewidth=0.8, ax=ax, edgecolor='0.8')



# add Alaska
akax = fig.add_axes([0.1, 0.17, 0.2, 0.19])   
akax.axis('off')
# polygon to clip western islands
polygon = Polygon([(-170,50),(-170,72),(-140, 72),(-140,50)])
alaska_gdf.clip(polygon).plot(color=alaska_gdf['value_determined_color'], linewidth=0.8,ax=akax, edgecolor='0.8')


# add Hawaii
hiax = fig.add_axes([.28, 0.20, 0.1, 0.1])   
hiax.axis('off')
# polygon to clip western islands
hipolygon = Polygon([(-160,0),(-160,90),(-120,90),(-120,0)])
hawaii_gdf.clip(hipolygon).plot(column=VAL_COLUMN, color=hawaii_gdf['value_determined_color'], linewidth=0.8,ax=hiax, edgecolor='0.8')
