# Import Packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import io
import datetime

from election_forecasting import *

# Get Polling Data

In [7]:
# Get recent Polling data
polls = pd.read_csv("https://projects.fivethirtyeight.com/polls-page/president_polls.csv")

In [3]:
# Create Pollster Ratings DataFrame from 538
url = "https://raw.githubusercontent.com/fivethirtyeight/data/master/pollster-ratings/pollster-ratings.csv"
res = requests.get(url).content
pollster_rating = pd.read_csv(io.StringIO(res.decode('utf-8')))

In [9]:
# Get example matchup polls
polls = get_matchup(polls, pollster_rating, "Donald Trump", "Joseph R. Biden Jr.", "rep", "dem", level = "all")

## Weight for Recency

In [10]:
# Create columns with the month and year of the polls
month_array = []
for i in range(len(polls["end_date"])):
    month_array.append(int(str(polls["end_date"][i])[5:7]))
    
year_array = []
for i in range(len(polls["end_date"])):
    year_array.append(int(str(polls["end_date"][i])[:4]))

polls["month"] = pd.Series(month_array)
polls["year"] = pd.Series(year_array)

In [11]:
# Get the month and year of the most recent poll
latest_poll = [polls["month"][0], polls["year"][0]]

In [12]:
# Convert months and year into distance in months from most recent poll
month_diff = []
for i in range(polls.shape[0]):
    months_back = latest_poll[0] - polls["month"][i]
    years_back = latest_poll[1] - polls["year"][i]
    month_diff.append(months_back + (12 * years_back))

polls["month_diff"] = pd.Series(month_diff)

In [13]:
# Create a weighting column for recency of polling data
polls["recency_weight"] = 1 / (polls["month_diff"] + 1)

## Weight for Pollster Rating

In [18]:
# Create a weighting column for quality of pollster: from FiveThirtyEight
polls["pollster_weight"] = 1 / (polls["Predictive    Plus-Minus"] + 2.0)

In [19]:
# Create cumulative weighting column
polls["poll_weight"] = polls["pollster_weight"] * polls["recency_weight"]

In [20]:
# Create weighted polls columns
polls["weighted_votes_dem"] = polls["poll_weight"] * polls["votes_dem"]
polls["weighted_votes_rep"] = polls["poll_weight"] * polls["votes_rep"]

In [21]:
polls

Unnamed: 0,question_id,poll_id,stage,office_type,pollster,state,end_date,candidate_name_rep,candidate_party_rep,sample_size,...,votes_dem,total_decided,month,year,month_diff,recency_weight,pollster_weight,poll_weight,weighted_votes_dem,weighted_votes_rep
0,127936,68308,general,U.S. President,Landmark Communications,Georgia,2020-08-31,Donald Trump,REP,500,...,202,441,8,2020,0,1.000000,0.500000,0.500000,101.000000,119.500000
1,127932,68307,general,U.S. President,East Carolina University,North Carolina,2020-08-30,Donald Trump,REP,1101,...,513,1049,8,2020,0,1.000000,0.370370,0.370370,190.000000,198.518519
2,127908,68287,general,U.S. President,Morning Consult,Arizona,2020-08-30,Donald Trump,REP,943,...,490,886,8,2020,0,1.000000,0.333333,0.333333,163.333333,132.000000
3,127909,68288,general,U.S. President,Morning Consult,Georgia,2020-08-30,Donald Trump,REP,1392,...,682,1322,8,2020,0,1.000000,0.333333,0.333333,227.333333,213.333333
4,127910,68289,general,U.S. President,Morning Consult,Michigan,2020-08-30,Donald Trump,REP,1424,...,740,1338,8,2020,0,1.000000,0.333333,0.333333,246.666667,199.333333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
677,93202,57543,general,U.S. President,Harper Polling,North Carolina,2019-02-13,Donald Trump,REP,500,...,195,410,2,2019,18,0.052632,0.476190,0.025063,4.887218,5.388471
678,92974,57445,general,U.S. President,Emerson College,Iowa,2019-02-02,Donald Trump,REP,831,...,420,830,2,2019,18,0.052632,0.625000,0.032895,13.815789,13.486842
679,92926,57427,general,U.S. President,Glengariff Group,Michigan,2019-01-26,Donald Trump,REP,600,...,319,560,1,2019,19,0.050000,0.333333,0.016667,5.316667,4.016667
680,92597,57275,general,U.S. President,Public Policy Polling,North Carolina,2019-01-07,Donald Trump,REP,750,...,367,697,1,2019,19,0.050000,0.476190,0.023810,8.738095,7.857143
