# Scrape the "Voter Power Index" From 538

This notebook scrapes the "Voter Power Index" for 538 at a given moment and puts the resulting CSV in the `data/fivethirtyeight` directory.

In [1]:
import requests
import lxml.html
import cssselect
import pandas as pd

## Fetch 538 Election Forecast

In [2]:
URL = "http://projects.fivethirtyeight.com/2016-election-forecast/"

In [3]:
res = requests.get(URL)

In [4]:
dom = lxml.html.fromstring(res.content)

## Extract "Voter Power Index"

In [5]:
table = dom.cssselect("table.tippingpointroi")[1]

In [6]:
states = []
for row in table.cssselect("tr"):
    abbrev = row.attrib["data-state"]
    cells = row.cssselect("td")
    state = cells[0].text_content().strip()
    power = cells[1].text_content().strip()
    # Set any value less than 0.1 to 0
    if power == "<0.1":
        power = 0
    else:
        power = float(power)
    state_dict = {
        "state_raw": state,
        "power": power,
        "abbrev": abbrev
    }
    states.append(state_dict)

In [7]:
voter_power_index = pd.DataFrame(states)

In [8]:
def clean_state_names(state_str):
    # Prepare to drop non-statewide electoral votes
    if state_str in [
        "Maine 2nd District", "Nebraska 2nd District", 
         "Maine 1st District", "Nebraska 1st District", 
         "Nebraska 3rd District" ]:
        return None
    elif state_str == "Maine - statewide":
        return "MAINE"
    elif state_str == "Nebraska - statewide":
        return "NEBRASKA"
    else:
        return state_str.upper()

In [9]:
voter_power_index["state"] = voter_power_index["state_raw"].apply(clean_state_names)

In [10]:
voter_power_index.head()

Unnamed: 0,abbrev,power,state_raw,state
0,NH,5.6,New Hampshire,NEW HAMPSHIRE
1,NV,4.9,Nevada,NEVADA
2,NM,4.7,New Mexico,NEW MEXICO
3,MI,3.1,Michigan,MICHIGAN
4,CO,3.1,Colorado,COLORADO


In [11]:
voter_power_index.to_csv("../data/fivethirtyeight/voter-power-index.csv", index=None)

---

---

---