# Download Forecast Data From *FiveThirtyEight*

This notebook downloads the current predictions for the House and Senate races from *FiveThirtyEight* at a given moment and puts the resulting CSV in the `data/fivethirtyeight` directory.

In [1]:
import requests
import pandas as pd
import json

## House Forecast

The data we want is embedded in a JSON file on the *FiveThirtyEight* site that populates the webpage.

In [2]:
HOUSE_URL = "https://projects.fivethirtyeight.com/2018-midterm-election-forecast/house/home.json"
house_data = requests.get(HOUSE_URL).json()

## Extract Candidate Win Probabilities

In [3]:
house_candidates = []
for d in house_data["districtForecasts"]:
    for f in d["forecast"]:
        if d["state"] != "US":
            candidate_dict = {
                "state": d["state"],
                "district": d["district"]        
            }
            candidate_dict["candidate"] = f["candidate"]
            candidate_dict["party"] = f["party"]
            candidate_dict["classic_prob"] = f["models"]["classic"]["winprob"]
            house_candidates.append(candidate_dict)
            
house_candidates = pd.DataFrame(house_candidates)
house_candidates.head()

Unnamed: 0,candidate,classic_prob,district,party,state
0,Alyse S. Galvin,28.732,1,D,AK
1,Don Young,71.268,1,R,AK
2,Others,0.0,1,,AK
3,Terri A. Sewell,100.0,7,D,AL
4,Danner Kline,0.016,6,D,AL


In [4]:
len(house_candidates)

1031

In [5]:
house_candidates["state"].nunique()

50

In [6]:
house_candidates.to_csv(
    "../data/fivethirtyeight/house_candidate_odds.csv",
    index = None
)

## Senate Forecast

In [7]:
SENATE_URL = "https://projects.fivethirtyeight.com/2018-midterm-election-forecast/senate/home.json"

senate_data = requests.get(SENATE_URL).json()

### General forecast

In [8]:
senate_candidates = []
for d in senate_data["seatForecasts"]:
    for f in d["forecast"]:
        if d["state"] != "US":
            senate_candidate_dict = {
                "state": d["state"],
                "class": d["class"]
            }
            senate_candidate_dict["candidate"] = f["candidate"]
            senate_candidate_dict["party"] = f["party"]
            senate_candidate_dict["classic_prob"] = f["models"]["classic"]["winprob"]
            senate_candidates.append(senate_candidate_dict)
            
senate_candidates = pd.DataFrame(senate_candidates)

senate_candidates.head()

Unnamed: 0,candidate,class,classic_prob,party,state
0,Kyrsten Sinema,1,61.106,D,AZ
1,Angela Green,1,0.0,G,AZ
2,Martha McSally,1,38.894,R,AZ
3,Dianne Feinstein,1,98.378,D,CA
4,Kevin de Leon,1,1.622,D,CA


In [9]:
len(senate_candidates)

97

In [10]:
senate_candidates["state"].nunique()

33

In [11]:
senate_candidates.to_csv(
    "../data/fivethirtyeight/senate_candidate_odds.csv",
    index = None
)

### Voter Power Index

In [12]:
senate_seats = []
for senate_seat in senate_data["seatForecasts"]:
    if "vpi" in senate_seat.keys():
        senate_seat_dict = {
            "state": senate_seat["state"],
            "vpi": senate_seat["vpi"]["classic"],
            "class": senate_seat["class"]
        }
        senate_seats.append(senate_seat_dict)
        
voter_power_index = pd.DataFrame(senate_seats) 

voter_power_index.head()

Unnamed: 0,class,state,vpi
0,1,AZ,3.865268
1,1,CA,0.0
2,1,CT,0.056185
3,1,DE,0.052618
4,1,FL,1.007093


In [13]:
len(voter_power_index)

35

In [14]:
voter_power_index["state"].nunique()

33

In [15]:
voter_power_index["state"].value_counts()[:5]

MS    2
MN    2
WV    1
FL    1
WA    1
Name: state, dtype: int64

In [16]:
total_voter_power_index = (
    voter_power_index
    .groupby("state")
    ["vpi"].sum()
    .to_frame()
)

total_voter_power_index.head()

Unnamed: 0_level_0,vpi
state,Unnamed: 1_level_1
AZ,3.865268
CA,0.0
CT,0.056185
DE,0.052618
FL,1.007093


In [17]:
total_voter_power_index.to_csv(
    "../data/fivethirtyeight/senate-voter-power-index.csv"
)

---

---

---