# U.S. Census apportionment data in 2010 and 2020

In [1]:
%load_ext lab_black

In [36]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_latimes as lat
import matplotlib.pyplot as plt

%matplotlib inline

In [37]:
from vega_datasets import data

states = alt.topo_feature(data.us_10m.url, "states")

In [3]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Get the data from the U.S. Census

In [4]:
# https://www.census.gov/data/tables/2010/dec/2010-apportionment-data.html

In [144]:
url2010 = "https://www2.census.gov/programs-surveys/decennial/2010/data/apportionment/apport2010-table1.xls"

In [145]:
df_2010 = pd.read_excel(url2010, encoding="Latin-1", skiprows=10, skipfooter=6)

In [146]:
df_2010.columns = ["state", "population", "drop1", "seats", "drop2", "seats_change"]

In [147]:
df_2010.drop(["drop1", "drop2"], axis=1, inplace=True)

In [148]:
df_2010.tail(10)

Unnamed: 0,state,population,seats,seats_change
40,South Dakota,819761,1,0
41,Tennessee,6375431,9,0
42,Texas,25268418,36,4
43,Utah,2770765,4,1
44,Vermont,630337,1,0
45,Virginia,8037736,11,0
46,Washington,6753369,10,1
47,West Virginia,1859815,3,0
48,Wisconsin,5698230,8,0
49,Wyoming,568300,1,0


In [149]:
df_2010["year"] = "2010"

In [150]:
url2020 = "https://www2.census.gov/programs-surveys/decennial/2020/data/apportionment/apportionment-2020-table01.xlsx"

In [192]:
df_2020 = pd.read_excel(url2020, encoding="Latin-1", skiprows=3, skipfooter=2)

In [193]:
df_2020.tail()

Unnamed: 0,STATE,"APPORTIONMENT POPULATION \n(APRIL 1, 2020)",NUMBER OF APPORTIONED REPRESENTATIVES BASED ON \n2020 CENSUS2,CHANGE FROM \n2010 CENSUS APPORTIONMENT
45,Virginia,8654542,11,0
46,Washington,7715946,10,0
47,West Virginia,1795045,2,-1
48,Wisconsin,5897473,8,0
49,Wyoming,577719,1,0


In [194]:
df_2020.columns = ["state", "population", "seats", "seats_change"]

In [195]:
df_2020["year"] = "2020"

In [196]:
df = pd.concat([df_2010, df_2020])

In [197]:
df_2020

Unnamed: 0,state,population,seats,seats_change,year
0,Alabama,5030053,7,0,2020
1,Alaska,736081,1,0,2020
2,Arizona,7158923,9,0,2020
3,Arkansas,3013756,4,0,2020
4,California,39576757,52,-1,2020
5,Colorado,5782171,8,1,2020
6,Connecticut,3608298,5,0,2020
7,Delaware,990837,1,0,2020
8,Florida,21570527,28,1,2020
9,Georgia,10725274,14,0,2020


---

### Historical apportionment

In [168]:
hist_df = pd.read_excel(
    "https://www2.census.gov/programs-surveys/decennial/2020/data/apportionment/apportionment-2020-tableC2.xlsx",
    skiprows=4,
)

In [171]:
hist_df.head()

Unnamed: 0.1,Unnamed: 0,2020: Apportionment population,2020: \nNumber of representatives,2020: \nSeat change,2020: Average persons per representative,2010: Apportionment population,2010: \nNumber of representatives,2010: \nSeat change,2010: Average persons per representative,Unnamed: 9,2000: Apportionment population,2000: \nNumber of representatives,2000: \nSeat change,2000: Average persons per representative,1990: Apportionment population,1990: \nNumber of representatives,1990: \nSeat change,1990: Average persons per representative,Unnamed: 18,1980: Apportionment population,1980: \nNumber of representatives,1980: \nSeat change,1980: Average persons per representative,1970: Apportionment population,1970: \nNumber of representatives,...,1960: Apportionment population,1960: \nNumber of representatives,1960: \nSeat change,1960: Average persons per representative,1950: Apportionment population,1950: \nNumber of representatives,1950: \nSeat change,1950: Average persons per representative,Unnamed: 36,1940: Apportionment population,1940: \nNumber of representatives,1940: \nSeat change,1940: Average persons per representative,1930: Apportionment population,1930: \nNumber of representatives,1930: \nSeat change,1930: Average persons per representative,Unnamed: 45,1920: Apportionment population,1920: \nNumber of representatives,1920: Average persons per representative,1910: Apportionment population,1910: \nNumber of representatives,1910: \nSeat change\n(since 1900),1910: Average persons per representative
0,Alabama,5030053.0,7.0,0.0,718579.0,4802982.0,7.0,0.0,686140.0,Alabama,4461130.0,7.0,0.0,637304.0,4062608.0,7.0,0.0,580373.0,Alabama,3890061.0,7.0,0.0,555723.0,3475885.0,7.0,...,3266740.0,8.0,-1,408343.0,3061743,9,0,340194,Alabama,2832961,9,0,314773,2646242,9,-1,294027,Alabama,2348174,10,234817,2138093,10,1,213809
1,Alaska,736081.0,1.0,0.0,736081.0,721523.0,1.0,0.0,721523.0,Alaska,628933.0,1.0,0.0,628933.0,551947.0,1.0,0.0,551947.0,Alaska,400481.0,1.0,0.0,400481.0,304067.0,1.0,...,226167.0,1.0,X,226167.0,X,X,X,X,Alaska,X,X,X,X,X,X,X,X,Alaska,X,X,X,X,X,X,X
2,Arizona,7158923.0,9.0,0.0,795436.0,6412700.0,9.0,1.0,712522.0,Arizona,5140683.0,8.0,2.0,642585.0,3677985.0,6.0,1.0,612998.0,Arizona,2717866.0,5.0,1.0,543573.0,1787620.0,4.0,...,1302161.0,3.0,1,434054.0,749587,2,0,374794,Arizona,499261,2,1,249631,389375,1,0,389375,Arizona,309495,1,309495,X,X,X,X
3,Arkansas,3013756.0,4.0,0.0,753439.0,2926229.0,4.0,0.0,731557.0,Arkansas,2679733.0,4.0,0.0,669933.0,2362239.0,4.0,0.0,590560.0,Arkansas,2285513.0,4.0,0.0,571378.0,1942303.0,4.0,...,1786272.0,4.0,-2,446568.0,1909511,6,-1,318252,Arkansas,1949387,7,0,278484,1854444,7,0,264921,Arkansas,1752204,7,250315,1574449,7,0,224921
4,California,39576757.0,52.0,-1.0,761091.0,37341989.0,53.0,0.0,704566.0,California,33930798.0,53.0,1.0,640204.0,29839250.0,52.0,7.0,573832.0,California,23668562.0,45.0,2.0,525968.0,20098863.0,43.0,...,15717204.0,38.0,8,413611.0,10586223,30,7,352874,California,6907387,23,3,300321,5668241,20,9,283412,California,3426031,11,311457,2376561,11,3,216051


In [122]:
hist_df.columns = [
    "name",
    "type",
    "year",
    "population",
    "pop_pct_change",
    "density",
    "density_rank",
    "reps",
    "change_reps",
    "pop_per_rep",
]

In [123]:
hist_df.head()

Unnamed: 0,name,type,year,population,pop_pct_change,density,density_rank,reps,change_reps,pop_per_rep
0,Alabama,State,1910,2138093,16.9,42.2,25.0,10.0,1.0,213809.0
1,Alaska,State,1910,64356,1.2,0.1,52.0,,,
2,Arizona,State,1910,204354,66.2,1.8,49.0,,,
3,Arkansas,State,1910,1574449,20.0,30.3,30.0,7.0,0.0,224921.0
4,California,State,1910,2377549,60.1,15.3,38.0,11.0,3.0,216051.0


In [126]:
hist_df_ca = hist_df[hist_df["name"] == "California"].copy()

In [127]:
hist_df_ca["population"] = (
    hist_df_ca["population"].str.replace(",", "", regex=False).astype(int)
)

In [130]:
bars = alt.Chart(hist_df_ca).mark_bar().encode(y="change_reps:Q", x="year:O")

text = bars.mark_text(
    align="left",
    baseline="middle",
    dy=-10,  # Nudges text to right so it doesn't appear on top of the bar
    dx=0,
).encode(text="change_reps:Q")

(bars + text).properties(height=400)

In [129]:
lines = alt.Chart(hist_df_ca).mark_line().encode(x="year:O", y="population:Q")

(lines).properties(height=400)

---

## Geography

### States map

In [172]:
state_geo = gpd.read_file("raw/states.geojson")
state_geo.columns = state_geo.columns.str.lower()

### Add A.P. states

In [173]:
ap_states = pd.read_csv("raw/ap_states.csv")

In [174]:
states_merge = state_geo.merge(ap_states, left_on="stusps", right_on="usps")

### Clean up

In [175]:
states_merge.drop(
    ["name_x", "stusps", "statefp", "statens", "affgeoid", "lsad", "aland", "awater"],
    axis=1,
    inplace=True,
)

In [176]:
states_merge.rename(columns={"name_y": "name"}, inplace=True)

In [177]:
states_merge = states_merge[["geoid", "name", "usps", "ap", "geometry"]]

In [178]:
states_merge.head()

Unnamed: 0,geoid,name,usps,ap,geometry
0,23,Maine,ME,Maine,"MULTIPOLYGON (((-68.92401 43.88541, -68.87478 ..."
1,15,Hawaii,HI,Hawaii,"MULTIPOLYGON (((-156.04965 19.78045, -156.0062..."
2,4,Arizona,AZ,Ariz.,"MULTIPOLYGON (((-114.79968 32.59362, -114.8093..."
3,5,Arkansas,AR,Ark.,"MULTIPOLYGON (((-94.61792 36.49941, -94.36120 ..."
4,10,Delaware,DE,Del.,"MULTIPOLYGON (((-75.77379 39.72220, -75.75323 ..."


---

## Export 

In [179]:
df_2010.to_csv("processed/apportionment/apportionment_2010.csv", index=False)

In [198]:
df_2020.to_csv("processed/apportionment/apportionment_2020.csv", index=False)

In [181]:
df.to_csv("processed/apportionment/apportionment_2010_2020.csv", index=False)

In [92]:
states_merge.to_file("../../../data/GIS/states_geo_ap.geojson", driver="GeoJSON")