# Import Packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import io
import datetime

from election_forecasting import *

# Get Polling Data

In [2]:
# Get recent Polling data
polls = pd.read_csv("datasets/president_polls.csv")

In [3]:
# Create Pollster Ratings DataFrame from 538
url = "https://raw.githubusercontent.com/fivethirtyeight/data/master/pollster-ratings/pollster-ratings.csv"
res = requests.get(url).content
pollster_rating = pd.read_csv(io.StringIO(res.decode('utf-8')))

In [4]:
# Get example matchup polls
polls = get_matchup(polls, pollster_rating, "Donald Trump", "Elizabeth Warren", "rep", "dem", level = "all")

## Weight for Recency

In [5]:
# Create columns with the month and year of the polls
month_array = []
for i in range(len(polls["end_date"])):
    month_array.append(int(str(polls["end_date"][i])[5:7]))
    
year_array = []
for i in range(len(polls["end_date"])):
    year_array.append(int(str(polls["end_date"][i])[:4]))

polls["month"] = pd.Series(month_array)
polls["year"] = pd.Series(year_array)

In [6]:
# Get the month and year of the most recent poll
latest_poll = [polls["month"][0], polls["year"][0]]

In [7]:
# Convert months and year into distance in months from most recent poll
month_diff = []
for i in range(polls.shape[0]):
    months_back = latest_poll[0] - polls["month"][i]
    years_back = latest_poll[1] - polls["year"][i]
    month_diff.append(months_back + (12 * years_back))

polls["month_diff"] = pd.Series(month_diff)

In [8]:
# Create a weighting column for recency of polling data
polls["recency_weight"] = 1 / (polls["month_diff"] + 1)

## Weight for Pollster Rating

In [9]:
# Create a weighting column for quality of pollster: from FiveThirtyEight
polls["pollster_weight"] = 1 / (polls["Predictive Plus-Minus"] + 2.0)

In [10]:
# Create cumulative weighting column
polls["poll_weight"] = polls["pollster_weight"] * polls["recency_weight"]

In [11]:
# Create weighted polls columns
polls["weighted_votes_dem"] = polls["poll_weight"] * polls["votes_dem"]
polls["weighted_votes_rep"] = polls["poll_weight"] * polls["votes_rep"]

In [12]:
polls

Unnamed: 0,question_id,poll_id,stage,office_type,pollster,state,end_date,candidate_name_rep,candidate_party_rep,sample_size,...,votes_dem,total_decided,month,year,month_diff,recency_weight,pollster_weight,poll_weight,weighted_votes_dem,weighted_votes_rep
0,113994,62978,general,U.S. President,Emerson College,New Hampshire,2019-11-26,Donald Trump,REP,547,...,266,546,11,2019,0,1.000000,0.666667,0.666667,177.333333,186.666667
1,113999,62978,general,U.S. President,Emerson College,New Hampshire,2019-11-26,Donald Trump,REP,637,...,301,574,11,2019,0,1.000000,0.666667,0.666667,200.666667,182.000000
2,114109,62980,general,U.S. President,SurveyUSA,California,2019-11-22,Donald Trump,REP,2039,...,1141,1854,11,2019,0,1.000000,0.833333,0.833333,950.833333,594.166667
3,113252,62668,general,U.S. President,SurveyUSA,Georgia,2019-11-18,Donald Trump,REP,1303,...,599,1185,11,2019,0,1.000000,0.833333,0.833333,499.166667,488.333333
4,113346,62674,general,U.S. President,Marquette University Law School,Wisconsin,2019-11-17,Donald Trump,REP,801,...,344,728,11,2019,0,1.000000,1.000000,1.000000,344.000000,384.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102,92980,57445,general,U.S. President,Emerson College,Iowa,2019-02-02,Donald Trump,REP,831,...,333,736,2,2019,9,0.100000,0.666667,0.066667,22.200000,26.866667
103,92929,57427,general,U.S. President,Glengariff Group Inc.,Michigan,2019-01-26,Donald Trump,REP,600,...,274,529,1,2019,10,0.090909,0.384615,0.034965,9.580420,8.916084
104,92872,57407,general,U.S. President,Praecones Analytica,New Hampshire,2019-01-21,Donald Trump,REP,593,...,317,562,1,2019,10,0.090909,0.344828,0.031348,9.937304,7.680251
105,92602,57275,general,U.S. President,Public Policy Polling,North Carolina,2019-01-07,Donald Trump,REP,750,...,345,690,1,2019,10,0.090909,0.454545,0.041322,14.256198,14.256198
