# Analyze FiveThirtyEight Voter Power Index

In [1]:
import pandas as pd

## Load "Voter Power Index" scores

In [2]:
voter_power_index = pd.read_csv("../data/fivethirtyeight/senate-voter-power-index.csv")

In [3]:
usps_state = {
    "AZ": "ARIZONA",
    "CA": "CALIFORNIA",
    "CT": "CONNECTICUT",
    "DE": "DELAWARE",
    "FL": "FLORIDA",
    "HI": "HAWAII",
    "IN": "INDIANA",
    "MA": "MASSACHUSETTS",
    "MD": "MARYLAND",
    "ME": "MAINE",
    "MI": "MICHIGAN",
    "MN": "MINNESOTA",
    "MO": "MISSOURI",
    "MS": "MISSISSIPPI",
    "MT": "MONTANA",
    "ND": "NORTH DAKOTA",
    "NE": "NEBRASKA",
    "NJ": "NEW JERSEY",
    "NM": "NEW MEXICO",
    "NV": "NEVADA",
    "NY": "NEW YORK",
    "OH": "OHIO",
    "PA": "PENNSYLVANIA",
    "RI": "RHODE ISLAND",
    "TN": "TENNESSEE",
    "TX": "TEXAS",
    "UT": "UTAH",
    "VA": "VIRGINIA",
    "VT": "VERMONT",
    "WA": "WASHINGTON",
    "WI": "WISCONSIN",
    "WV": "WEST VIRGINIA",
    "WY": "WYOMING"
}

In [4]:
voter_power_index["state_name"] = voter_power_index["state"].apply(usps_state.get)

In [5]:
voter_power_index.head()

Unnamed: 0,state,vpi,state_name
0,AZ,3.830486,ARIZONA
1,CA,0.0,CALIFORNIA
2,CT,0.067179,CONNECTICUT
3,DE,0.078927,DELAWARE
4,FL,0.989972,FLORIDA


In [6]:
assert voter_power_index["state_name"].isnull().sum() == 0

## Load state registered voter counts, by demographic

Note: Population and registered-voter counts are *thousands*.

In [7]:
demo_by_state = pd.read_excel(
    "../data/census/table04b.xls",
    skiprows = 4,
    skipfooter = 5,
    na_values = [ "-" ]
)

demo_by_state.columns = [
   "state", "demographic", "total_pop", "total_citizen_pop", "total_registered", 
   "pct_registered", "moe_registered", "pct_registered_citizen", 
   "moe_registered_citizen", "total_voted", "pct_voted", "moe_voted", 
   "percent_voted_citizen", "moe_voted_citizen"
]

In [8]:
demo_by_state.head()

Unnamed: 0,state,demographic,total_pop,total_citizen_pop,total_registered,pct_registered,moe_registered,pct_registered_citizen,moe_registered_citizen,total_voted,pct_voted,moe_voted,percent_voted_citizen,moe_voted_citizen
0,US,Total,245502,224059,157596.0,64.2,0.3,70.3,0.3,137537.0,56.0,0.3,61.4,0.3
1,,Male,118488,107554,73761.0,62.3,0.4,68.6,0.4,63801.0,53.8,0.4,59.3,0.5
2,,Female,127013,116505,83835.0,66.0,0.4,72.0,0.4,73735.0,58.1,0.4,63.3,0.4
3,,White alone,192129,177865,127463.0,66.3,0.3,71.7,0.3,111891.0,58.2,0.3,62.9,0.4
4,,White non-Hispanic alone,157395,154450,114151.0,72.5,0.3,73.9,0.3,100849.0,64.1,0.4,65.3,0.4


In [9]:
demo_by_state["state"] = demo_by_state["state"].fillna(method="ffill")

In [10]:
demo_by_state.head()

Unnamed: 0,state,demographic,total_pop,total_citizen_pop,total_registered,pct_registered,moe_registered,pct_registered_citizen,moe_registered_citizen,total_voted,pct_voted,moe_voted,percent_voted_citizen,moe_voted_citizen
0,US,Total,245502,224059,157596.0,64.2,0.3,70.3,0.3,137537.0,56.0,0.3,61.4,0.3
1,US,Male,118488,107554,73761.0,62.3,0.4,68.6,0.4,63801.0,53.8,0.4,59.3,0.5
2,US,Female,127013,116505,83835.0,66.0,0.4,72.0,0.4,73735.0,58.1,0.4,63.3,0.4
3,US,White alone,192129,177865,127463.0,66.3,0.3,71.7,0.3,111891.0,58.2,0.3,62.9,0.4
4,US,White non-Hispanic alone,157395,154450,114151.0,72.5,0.3,73.9,0.3,100849.0,64.1,0.4,65.3,0.4


## Join the two datasets

In [11]:
joined = pd.merge(
    demo_by_state,
    voter_power_index,
    left_on = "state",
    right_on = "state_name",
    how = "left",
    suffixes = ["", "_vpi"]
).dropna(subset = ["vpi"])

In [12]:
joined.head().T

Unnamed: 0,33,34,35,36,37
state,ARIZONA,ARIZONA,ARIZONA,ARIZONA,ARIZONA
demographic,Total,Male,Female,White alone,White non-Hispanic alone
total_pop,5196,2525,2671,4471,2940
total_citizen_pop,4585,2256,2329,3950,2875
total_registered,3145,1485,1660,2773,2145
pct_registered,60.5,58.8,62.2,62,73
moe_registered,2.2,3.2,3.1,2.4,2.7
pct_registered_citizen,68.6,65.8,71.3,70.2,74.6
moe_registered_citizen,2.3,3.3,3.1,2.4,2.7
total_voted,2769,1273,1496,2480,1963


In [13]:
joined["demo_power"] = joined["total_registered"] * joined["vpi"]

In [14]:
trimmed = (
    joined[
        ~joined["demographic"].isin(["Total", "Male", "Female", "White alone"]) &
        ~joined["demographic"].str.contains(r"combination")
    ]
    .sort_values("demo_power", ascending = False)
    .set_index([
        "state",
        "demographic"
    ])
    [[
        "total_registered",
        "vpi",
        "demo_power"
    ]]
)

trimmed.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_registered,vpi,demo_power
state,demographic,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
TENNESSEE,White non-Hispanic alone,2601.0,4.70981,12250.21581
NORTH DAKOTA,White non-Hispanic alone,399.0,29.62529,11820.49071
MISSOURI,White non-Hispanic alone,2832.0,3.607548,10216.575936
NEVADA,White non-Hispanic alone,879.0,10.79997,9493.17363
TEXAS,White non-Hispanic alone,6822.0,1.309738,8935.032636
ARIZONA,White non-Hispanic alone,2145.0,3.830486,8216.39247
FLORIDA,White non-Hispanic alone,6432.0,0.989972,6367.50119
MISSISSIPPI,White non-Hispanic alone,1056.0,5.337465,5636.36304
INDIANA,White non-Hispanic alone,2832.0,1.238373,3507.072336
TEXAS,Hispanic (of any race),2654.0,1.309738,3476.044652


*Note: "White non-Hispanic alone" in North Dakota is index 389, while "Black alone or in combination" in Mississippi is index 284*

In [15]:
(
    trimmed.loc[("NORTH DAKOTA", "White non-Hispanic alone")]["demo_power"] 
    / trimmed.loc[("MISSISSIPPI", "Black alone")]["demo_power"]
)

3.5041556404478604

---

---

---