In [12]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split, KFold, RandomizedSearchCV
from sklearn.metrics import mean_absolute_error
import xgboost
from scipy.stats import loguniform, randint, t


pd.options.mode.chained_assignment = None 

methodologies = ["Live Phone", "IVR", "Online", "Text", "Mail", "Probability Panel", "Text-to-Web"]


# Functions to Calculate Pollster Rating
This project works in the following way:
1. For every year, look at all polls from previous years
2. Create a prediction algorithm from all non-pollster values (sample size, methodology, partisan, samplesize, days_before_election) and use them to predict the error via XGBoost
3. Then, get the best model's predictions for each value, and place it back into the original dataset
4. Calculate how much better each pollster is than what we'd expect from that pollster, given the other data points we have

In [13]:
def get_best_estimator(X, y):
    """Runs through XGBoost to get the expected error based on non-pollster values (partisan, samplesize, etc.). 
    Returns the estimator that predicts that error the best."""
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=42)
    
    one_hot_fts = ['office_type', 'partisan']
    std_fts = ['sample_size', 'days_before_election'] + methodologies
    preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(), one_hot_fts),
    ('std', StandardScaler(), std_fts)])
    
    cv = KFold(n_splits = 4)
    xgb = xgboost.XGBRegressor()
    model_name = xgb.__class__.__name__
    
    param_dict = {
        'n_estimators': randint(10, 200), 
        'max_depth': randint(2, 12), 
        'eta': loguniform(0.001, 1), 
        'reg_alpha': loguniform(0.01, 100), 
        'reg_lambda': loguniform(0.01, 100)
    }
    
    param_dict = {f"{model_name.lower()}__{key}": value for key, value in param_dict.items()}
    
    pipe = make_pipeline(preprocessor, xgb)
    
    grid = RandomizedSearchCV(pipe, param_distributions=param_dict, n_iter = 75, scoring='neg_mean_squared_error', cv = cv, verbose=1)
    grid.fit(X_train, y_train)
    
    #Code only used if I want to debug and see how good the model is doing
    test_score = mean_absolute_error(y_test, grid.predict(X_test))
    print(f"Test MAE is {test_score}")
    
    return (grid.best_estimator_)
    

In [19]:
#HYPERPARAMETERS:
years_to_rate = 10

def conf_width(errors):
        """Calculates the length of the one-sided 95% conf interval, based on student's t-distribution."""
        if len(errors) == 1:
                return np.inf
        else:
                return (t.ppf(0.95, len(errors) - 1) * np.std(errors) / np.sqrt(len(errors)))

def plus_minus_year(before_year, pre_filtered_data):
    """For a given year, returns values for expected error for every poll, based on all years before that year"""
    #takes only the years and columns we care about
    previous_years = pre_filtered_data.loc[(pre_filtered_data['year'] < before_year) & (pre_filtered_data['year'] >= before_year - years_to_rate), :]
    
    filtered_data = previous_years.loc[:, ['office_type', 'methodology', 'partisan', 'sample_size', 'days_before_election', 'error']]
    
    #Splitting up by methodology
    for method in methodologies:
        filtered_data[method] = filtered_data['methodology'].str.contains(method)
    
    filtered_data.drop(columns = ['methodology'])
    X = filtered_data.drop(columns=['error'])
    y = filtered_data['error']
    
    #Getting error differentials for each pollster
    estimator = get_best_estimator(X, y)
    previous_years.loc[:, 'expected_error'] = estimator.predict(X)
    previous_years.loc[:, 'error_differential'] = previous_years['expected_error'] - previous_years['error']
    
    pollster_error_differential = previous_years.groupby(["pollster_rating_id"], as_index=False).agg({'error_differential': [conf_width, 'mean', 'count'], 
                                                                                                                  'bias': 'mean'})
    
    #Calculating info relating to error differentials
    pollster_error_differential.columns = ["pollster_rating_id", "error_differential_conf", "error_differential_mean", 'count', 'mean_bias']
    #Getting the lower bound for error differential, based on the confidence interval and mean
    pollster_error_differential['lower_error_diff'] = pollster_error_differential["error_differential_mean"] - pollster_error_differential["error_differential_conf"]
    #Check if a pollster is valid yes or no    
    pollster_error_differential['valid'] = pollster_error_differential['count'] >= 10
    pollster_error_differential['year'] = before_year
    
    results = pollster_error_differential.loc[:, ['year', 'pollster_rating_id', "lower_error_diff", "mean_bias", "count", "valid"]]
    
    return(results)

**Running Code**

In [20]:
past_polls = pd.read_csv('../../cleaned_data/Historical Polls.csv')
full_pollster_ratings = pd.concat([plus_minus_year(year, past_polls) for year in [2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018, 2020, 2022, 2024]])

Fitting 4 folds for each of 75 candidates, totalling 300 fits
Test MAE is 3.1174580691027085
Fitting 4 folds for each of 75 candidates, totalling 300 fits
Test MAE is 3.0205798946596545
Fitting 4 folds for each of 75 candidates, totalling 300 fits
Test MAE is 3.04354519490235
Fitting 4 folds for each of 75 candidates, totalling 300 fits
Test MAE is 3.1500743993063955
Fitting 4 folds for each of 75 candidates, totalling 300 fits
Test MAE is 2.932063278732641
Fitting 4 folds for each of 75 candidates, totalling 300 fits
Test MAE is 3.0282062416076667
Fitting 4 folds for each of 75 candidates, totalling 300 fits
Test MAE is 2.952465621375303
Fitting 4 folds for each of 75 candidates, totalling 300 fits
Test MAE is 3.0505516587144066
Fitting 4 folds for each of 75 candidates, totalling 300 fits
Test MAE is 3.0950932929503856
Fitting 4 folds for each of 75 candidates, totalling 300 fits
Test MAE is 3.099793147868101
Fitting 4 folds for each of 75 candidates, totalling 300 fits
Test MAE is 3

**Ranking Pollsters within each year and valid/not, and then publishing full data!**

In [21]:
full_pollster_ratings['rank'] = full_pollster_ratings.groupby(['year', 'valid'])['lower_error_diff'].rank('min', ascending=False)
full_pollster_ratings.to_csv("../../cleaned_data/Pollster Ratings.csv")