# Household Income of Some US States (2017)

In [1]:
import numpy as np
import pandas as pd

from lets_plot import *
from lets_plot.mapping import as_discrete
from lets_plot.geo_data import *

The geodata is provided by © OpenStreetMap contributors and is made available here under the Open Database License (ODbL).


In [2]:
LetsPlot.setup_html()

In [3]:
state_codes_df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/us_state_codes.csv")
print(state_codes_df.shape)
state_codes_df.head(3)

(50, 2)


Unnamed: 0,state,code
0,Alabama,AL
1,Alaska,AK
2,Arizona,AZ


In [4]:
def get_pop_data():
    df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/midwest.csv")
    df.drop(columns=["Unnamed: 0"], inplace=True)
    return df.merge(state_codes_df, how="left", left_on="state", right_on="code", suffixes=["_code", "_name"])

pop_df = get_pop_data()
print(pop_df.shape)
print(pop_df.columns)
pop_df.head(3)

(437, 30)
Index(['PID', 'county', 'state_code', 'area', 'poptotal', 'popdensity',
       'popwhite', 'popblack', 'popamerindian', 'popasian', 'popother',
       'percwhite', 'percblack', 'percamerindan', 'percasian', 'percother',
       'popadults', 'perchsd', 'percollege', 'percprof', 'poppovertyknown',
       'percpovertyknown', 'percbelowpoverty', 'percchildbelowpovert',
       'percadultpoverty', 'percelderlypoverty', 'inmetro', 'category',
       'state_name', 'code'],
      dtype='object')


Unnamed: 0,PID,county,state_code,area,poptotal,popdensity,popwhite,popblack,popamerindian,popasian,...,poppovertyknown,percpovertyknown,percbelowpoverty,percchildbelowpovert,percadultpoverty,percelderlypoverty,inmetro,category,state_name,code
0,561,ADAMS,IL,0.052,66090,1270.96154,63917,1702,98,249,...,63628,96.274777,13.151443,18.011717,11.009776,12.443812,0,AAR,Illinois,IL
1,562,ALEXANDER,IL,0.014,10626,759.0,7054,3496,19,48,...,10529,99.087145,32.244278,45.826514,27.385647,25.228976,0,LHR,Illinois,IL
2,563,BOND,IL,0.022,14991,681.409091,14477,429,35,16,...,14235,94.956974,12.068844,14.036061,10.85209,12.69741,0,AAR,Illinois,IL


In [5]:
state_names = list(pop_df["state_name"].unique())
state_names

['Illinois', 'Indiana', 'Michigan', 'Ohio', 'Wisconsin']

In [6]:
def get_income_data():
    df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/US_household_income_2017.csv", encoding_errors='ignore')
    return df[df["State_Name"].isin(state_names) & df["Mean"] > 0].sort_values(by="Mean").reset_index(drop=True)

income_df = get_income_data()
print(income_df.shape)
print(income_df.columns)
income_df.head(3)

(2669, 19)
Index(['id', 'State_Code', 'State_Name', 'State_ab', 'County', 'City', 'Place',
       'Type', 'Primary', 'Zip_Code', 'Area_Code', 'ALand', 'AWater', 'Lat',
       'Lon', 'Mean', 'Median', 'Stdev', 'sum_w'],
      dtype='object')


Unnamed: 0,id,State_Code,State_Name,State_ab,County,City,Place,Type,Primary,Zip_Code,Area_Code,ALand,AWater,Lat,Lon,Mean,Median,Stdev,sum_w
0,55024108,55,Wisconsin,WI,Adams County,Milwaukee,Adams city,Track,Track,53233,414,762847,0,43.036953,-87.930389,13307,9414,14040,159.652773
1,18023575,18,Indiana,IN,Adams County,Hammond,Berne city,Track,Track,46320,219,1413079,0,41.619125,-87.517872,13375,9661,15154,513.090245
2,26029391,26,Michigan,MI,Alcona County,Detroit,Hubbard Lake,Track,Track,48211,313,1951619,0,42.390071,-83.077663,13609,8512,14142,196.313644


In [7]:
states_gdf = geocode_states(state_names).scope("US").get_boundaries()
states_gdf.head(3)

Unnamed: 0,state,found name,geometry
0,Illinois,Illinois,"MULTIPOLYGON (((-89.13301 36.98200, -89.25478 ..."
1,Indiana,Indiana,"MULTIPOLYGON (((-84.81993 39.10544, -84.89739 ..."
2,Michigan,Michigan,"MULTIPOLYGON (((-90.41862 46.56636, -89.95710 ..."


In [8]:
def get_scale_params(min_value, max_value, *, breaks_count=5, round=3, expand=1):
    min_deg, max_deg = np.log10(min_value), np.log10(max_value)
    return {
        'limits': [min_value - expand * 10**round, max_value + expand * 10**round],
        'breaks': [int(10**n / 10**round) * 10**round \
                   for n in np.linspace(min_deg, max_deg, breaks_count)],
    }

pie_lon, pie_lat = -81.5, 47.5

## v1

In [9]:
ggplot() + \
    geom_map(aes(paint_a="poptotal"), data=pop_df, map=states_gdf, map_join=["state_name", "state"], \
             color="black", size=1.2, fill_by='paint_a', tooltips='none') + \
    geom_point(aes("Lon", "Lat", paint_b="Mean", size="Mean"), data=income_df, \
               shape=21, alpha=.5, color="black", fill_by='paint_b', tooltips='none') + \
    geom_pie(aes(group="State_Name", slice="Mean", paint_c="Mean"), \
             data=income_df.groupby("State_Name")["Mean"].mean().sort_values(ascending=False).to_frame().reset_index(), \
             x=pie_lon, y=pie_lat, stat='identity', \
             size=19, stroke=2, hole=.2, \
             color="black", spacer_color="black", fill_by='paint_c', \
             labels=layer_labels().line("@State_Name"), tooltips='none') + \
    geom_text(x=pie_lon, y=pie_lat, label="Mean income by state", position=position_nudge(y=1.9), fontface='bold') + \
    scale_gradient('paint_a', name="Population", low="white", high="black", \
                   trans='log10', format="d", **get_scale_params(20_000, 80_000)) + \
    scale_brewer('paint_b', name="Mean income by household", palette="YlGnBu", \
                 **get_scale_params(income_df["Mean"].min(), income_df["Mean"].max())) + \
    scale_viridis('paint_c', name="Mean income by state", option='magma', \
                  format="d", direction=-1, **get_scale_params(58_000, 78_000, expand=3)) + \
    scale_size(range=[1, 10], guide='none') + \
    coord_map(xlim=[-93, -78], ylim=[36, 50]) + \
    ggsize(800, 800) + \
    ggtitle("Household income of some US states (2017)") + \
    theme_void() + \
    theme(plot_title=element_text(size=20, face='bold'), label_text=element_text(size=12))

## v2

In [10]:
states_centroids_gdf = geocode_states(state_names).scope("US").get_centroids()
states_centroids_gdf.head(3)

Unnamed: 0,state,found name,geometry
0,Illinois,Illinois,POINT (-89.45132 39.73878)
1,Indiana,Indiana,POINT (-86.17345 39.76253)
2,Michigan,Michigan,POINT (-84.50681 45.00341)


In [11]:
ggplot() + \
    geom_map(aes(paint_a="poptotal"), data=pop_df, map=states_gdf, map_join=["state_name", "state"], \
             color="black", size=1.2, fill_by='paint_a', \
             tooltips=layer_tooltips().line("@state_name")) + \
    geom_point(aes("Lon", "Lat", paint_b="Mean", size="Mean"), data=income_df, \
               shape=21, alpha=.5, color="black", fill_by='paint_b', tooltips='none') + \
    geom_pie(aes(paint_c="Mean", slice="Mean"), stat='identity', \
             data=income_df.groupby(["State_Name", "Type"])["Mean"].sum()\
                           .sort_values(ascending=False).to_frame().reset_index(), \
             map=states_centroids_gdf, map_join=["State_Name", "state"], \
             fill_by='paint_c', hole=.5, alpha=.9, \
             tooltips=layer_tooltips().title("@State_Name").line("@|@Type").line("Weighted sum|@Mean")) + \
    scale_gradient('paint_a', name="Population", low="white", high="black", \
                   trans='log10', format="d", **get_scale_params(20_000, 80_000)) + \
    scale_brewer('paint_b', name="Mean income by household", palette="YlGnBu", \
                 **get_scale_params(income_df["Mean"].min(), income_df["Mean"].max())) + \
    scale_viridis('paint_c', name="Income by state and type", option='magma', \
                  format="d", direction=-1, trans='log10', **get_scale_params(10**5, 10**8, round=5)) + \
    scale_size(range=[1, 10], guide='none') + \
    coord_map(xlim=[-93, -80], ylim=[36, 49]) + \
    ggsize(800, 800) + \
    ggtitle("Household income of some US states (2017)") + \
    theme_void() + \
    theme(plot_title=element_text(size=20, face='bold'))