In [1]:
import pandas as pd
import requests
from bokeh.io import show, output_file
from bokeh.models import (
    ColorBar, LinearColorMapper, GeoJSONDataSource, HoverTool, LogColorMapper
)
from bokeh.palettes import Viridis256 as palette
from bokeh.plotting import figure
import json


In [2]:

# Step 1: Obtain Census Data
census_url = "https://api.census.gov/data/2020/dec/pl?get=P1_001N,NAME&for=state:*"
response = requests.get(census_url)
data = response.json()

# Convert the data into a pandas DataFrame
columns = data[0]
rows = data[1:]
df = pd.DataFrame(rows, columns=columns)

df.head(5)



Unnamed: 0,P1_001N,NAME,state
0,13002700,Pennsylvania,42
1,39538223,California,6
2,1793716,West Virginia,54
3,3271616,Utah,49
4,20201249,New York,36


In [3]:

# Rename columns for clarity
df.rename(columns={'P1_001N': 'population', 'NAME': 'state_name'}, inplace=True)

# Convert population to integer
df['population'] = df['population'].astype(int)

# Standardize state names to title case
df['state_name'] = df['state_name'].str.title()

df.head(5)

Unnamed: 0,population,state_name,state
0,13002700,Pennsylvania,42
1,39538223,California,6
2,1793716,West Virginia,54
3,3271616,Utah,49
4,20201249,New York,36


In [4]:

# Step 2: Load GeoJSON Data for US States
geojson_url = 'https://raw.githubusercontent.com/PublicaMundi/MappingAPI/master/data/geojson/us-states.json'
geojson_response = requests.get(geojson_url)
us_states_geo = geojson_response.json()

# Remove Alaska, Hawaii, and Puerto Rico if desired
states_to_exclude = ['Alaska', 'Hawaii', 'Puerto Rico']
us_states_geo['features'] = [
    feature for feature in us_states_geo['features']
    if feature['properties']['name'] not in states_to_exclude
]

# Create a mapping from state names to populations
state_populations = df.set_index('state_name')['population'].to_dict()

# Add population data to GeoJSON properties
for feature in us_states_geo['features']:
    state_name = feature['properties']['name']
    population = state_populations.get(state_name, 0)
    feature['properties']['population'] = population

# Convert GeoJSON to a GeoJSONDataSource
geosource = GeoJSONDataSource(geojson=json.dumps(us_states_geo))



In [5]:

# Step 3: Create the Bokeh Visualization
palette = tuple(reversed(palette))

# Choose between Linear or Log Color Mapper
use_log_color_mapper = False  # Set to True if you prefer logarithmic scaling

if use_log_color_mapper:
    color_mapper = LogColorMapper(
        palette=palette,
        low=df['population'].min(),
        high=df['population'].max()
    )
else:
    color_mapper = LinearColorMapper(
        palette=palette,
        low=df['population'].min(),
        high=df['population'].max()
    )

# Create the figure
p = figure(
    title="2020 U.S. Census Population by State",
    toolbar_location="left",
    tools="pan,wheel_zoom,reset",
    width=800,
    height=500,
    match_aspect=True
)

# Add patches representing states
p.patches(
    'xs',
    'ys',
    source=geosource,
    fill_color={'field': 'population', 'transform': color_mapper},
    fill_alpha=0.7,
    line_color="white",
    line_width=0.5
)

# Add a hover tool
p.add_tools(HoverTool(tooltips=[
    ("State", "@name"),
    ("Population", "@population{,}"),
]))

# Add a color bar without specifying the ticker
color_bar = ColorBar(
    color_mapper=color_mapper,
    location=(0, 0),
    label_standoff=12,
    border_line_color=None
)
p.add_layout(color_bar, 'right')

# Output the visualization
output_file("census_population.html")
show(p)

In [14]:
### U.S. State Population Distribution and Top 10 (2020 Census) ###

from bokeh.plotting import figure
from bokeh.models import HoverTool, ColorBar, LinearColorMapper, GeoJSONDataSource, NumeralTickFormatter, ColumnDataSource
from bokeh.layouts import row
from bokeh.io import output_file, show
import json

# Rebuild fresh GeoJSONDataSource
geosource_new = GeoJSONDataSource(geojson=json.dumps(us_states_geo))

# Fresh Color Mapper
color_mapper_new = LinearColorMapper(
    palette=palette,
    low=df['population'].min(),
    high=df['population'].max()
)

# Map figure
p_map = figure(
    title="2020 U.S. Census Population by State",
    toolbar_location="left",
    tools="pan,wheel_zoom,reset,save",
    width=800,
    height=500,
    match_aspect=True
)
p_map.patches(
    'xs', 'ys',
    source=geosource_new,
    fill_color={'field': 'population', 'transform': color_mapper_new},
    fill_alpha=0.7,
    line_color="white",
    line_width=0.5
)
p_map.add_tools(HoverTool(tooltips=[("State", "@name"), ("Population", "@population{,}")]))
p_map.add_layout(ColorBar(color_mapper=color_mapper_new, location=(0, 0), label_standoff=12, border_line_color=None), 'right')

# Top 10 bar chart
df_sorted = df.sort_values('population', ascending=False)
top10 = df_sorted.head(10).copy()

src_top10 = ColumnDataSource({
    'state': top10['state_name'],
    'population': top10['population'],
    'population_label': top10['population'].map(lambda x: f"{x:,}")
})

y_labels = list(top10['state_name'])[::-1]
top10_plot = figure(
    height=500, width=480, y_range=y_labels,
    x_axis_label="Population", title="Top 10 States by Population",
    tools="pan,wheel_zoom,reset,save", toolbar_location="right"
)
top10_plot.hbar(y='state', right='population', height=0.7, source=src_top10)
top10_plot.add_tools(HoverTool(tooltips=[("State", "@state"), ("Population", "@population_label")]))
top10_plot.xaxis.formatter = NumeralTickFormatter(format="0,0")

# Combined layout
layout = row(p_map, top10_plot, sizing_mode="scale_width")

output_file("census_population_with_top10.html")
show(layout)