In [1]:
import pandas as pd
import numpy as np

from warnings import filterwarnings
filterwarnings('ignore')



import os

if os.path.exists('./delegates_preds.csv'):
     os.remove('./delegates_preds.csv')
if os.path.exists('./error_rates.csv'):
     os.remove('./error_rates.csv')    
if os.path.exists('./election_preds.csv'):
     os.remove('./election_preds.csv')

# Data Loading & Split

In [2]:
from sklearn.preprocessing import StandardScaler

data_h = pd.read_csv('./2016_2020.csv')
data_24 = pd.read_csv('./2024.csv')

common_df = pd.concat([data_h, data_24], axis=0, ignore_index=True)
common_df['delta'] = common_df['real_spread'] - common_df['spread']
common_df = pd.get_dummies(common_df, columns=['respondent_type', '3_scale_bias', '5_scale_bias', 'state'])
                                   
scaler = StandardScaler()

common_df[['sample_size', 'moe',
          'day_start', 'day_end',
          'spread']] = scaler.fit_transform(common_df[['sample_size', 'moe', 'day_start', 'day_end', 'spread']])


common_df[[
     'state_AZ', 'state_GA', 'state_MI', 'state_NC', 'state_NV', 'state_PA', 'state_WI',
     '3_scale_bias_independent', '3_scale_bias_pro-democratic', '3_scale_bias_pro-republican',
     'sample_size', 'moe', 
     'respondent_type_LV', 'respondent_type_RV',
     'spread',
     # 'day_start', 'day_end'
]]

Unnamed: 0,state_AZ,state_GA,state_MI,state_NC,state_NV,state_PA,state_WI,3_scale_bias_independent,3_scale_bias_pro-democratic,3_scale_bias_pro-republican,sample_size,moe,respondent_type_LV,respondent_type_RV,spread
0,True,False,False,False,False,False,False,False,True,False,-0.487564,0.156491,True,False,1.477126
1,True,False,False,False,False,False,False,False,True,False,-0.335343,-0.139645,True,False,1.477126
2,True,False,False,False,False,False,False,True,False,False,-0.545408,0.008423,True,False,1.194236
3,True,False,False,False,False,False,False,False,True,False,0.349653,1.044897,True,False,0.628456
4,False,True,False,False,False,False,False,False,False,True,0.976805,-1.176118,True,False,0.911346
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162,False,False,False,False,False,False,True,False,False,False,-0.213566,-0.287712,True,False,0.062676
163,False,False,False,False,False,False,True,False,True,False,-0.435809,1.785235,True,False,-1.634664
164,False,False,False,False,False,False,True,True,False,False,-1.154293,1.192964,True,False,0.345566
165,False,False,False,False,False,False,True,True,False,False,-0.384054,1.192964,True,False,-0.220214


In [3]:
from sklearn.model_selection import train_test_split


X_train, X_val, y_train, y_val = train_test_split(common_df[common_df['year'] != 2024][[
     'state_AZ', 'state_GA', 'state_MI', 'state_NC', 'state_NV', 'state_PA', 'state_WI',
     '3_scale_bias_independent', '3_scale_bias_pro-democratic', '3_scale_bias_pro-republican',
     'sample_size', 'moe', 
     'respondent_type_LV', 'respondent_type_RV',
     'spread', 
     'day_start', 'day_end'
]], common_df[common_df['year'] != 2024]['delta'], test_size=0.2, random_state=2024)

X_test = common_df[common_df['year'] == 2024][[
     'state_AZ', 'state_GA', 'state_MI', 'state_NC', 'state_NV', 'state_PA', 'state_WI',
     '3_scale_bias_independent', '3_scale_bias_pro-democratic', '3_scale_bias_pro-republican',
     'sample_size', 'moe', 
     'respondent_type_LV', 'respondent_type_RV',
     'spread', 
     'day_start', 'day_end'
]]

# Linear models

In [4]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

from sklearn.metrics import mean_absolute_error
import numpy as np
from tabulate import tabulate

def train_and_evaluate_model(model, X_train, y_train, X_val, y_val):
    """Trains a model and evaluates its performance on the validation set."""
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)

    mae = mean_absolute_error(y_val, y_pred)
    mse = np.mean((y_val - y_pred) ** 2)
    rmse = np.sqrt(mse)

    # append to csv file
    return model.__class__.__name__, round(mae, 3), round(mse, 3), round(rmse, 3)


lr_model = LinearRegression()
lasso_model = Lasso(alpha=0.1, max_iter=100000, selection='random', tol=0.01)
ridge_model = Ridge(alpha=1, max_iter=1000, solver='sparse_cg', tol=0.0001)

res_table = []
for model in [lr_model, lasso_model, ridge_model]:
    model_name, mae, mse, rmse = train_and_evaluate_model(model, X_train, y_train, X_val, y_val)
    res_table.append([model_name, mae, mse, rmse])
   
pd.DataFrame(res_table, columns=['Model', 'MAE', 'MSE', 'RMSE']).to_csv('error_rates.csv', index=False, mode='a')

print(tabulate(res_table, headers=['Model', 'MAE', 'MSE', 'RMSE'], tablefmt='fancy_grid', numalign='left'))

╒══════════════════╤═══════╤═══════╤════════╕
│ Model            │ MAE   │ MSE   │ RMSE   │
╞══════════════════╪═══════╪═══════╪════════╡
│ LinearRegression │ 1.037 │ 1.688 │ 1.299  │
├──────────────────┼───────┼───────┼────────┤
│ Lasso            │ 1.233 │ 2.413 │ 1.553  │
├──────────────────┼───────┼───────┼────────┤
│ Ridge            │ 1.037 │ 1.678 │ 1.295  │
╘══════════════════╧═══════╧═══════╧════════╛


In [5]:
import numpy as np
import pandas as pd
from tabulate import tabulate

def predict_swing_states(model):
    data_24_copy = pd.read_csv('./2024.csv')

    model_name = model.__class__.__name__
    data_24_copy['dt_preds'] = model.predict(X_test) + data_24_copy['spread']

    delegates = {
        'AZ': 11,
        'NV': 6,
        'WI': 10,
        'MI': 15,
        'PA': 19,
        'NC': 16,
        'GA': 16
    }

    delegates = pd.DataFrame.from_dict(delegates, orient='index', columns=['Delegates'])
    delegates.index.name = 'State'
    delegates.reset_index(inplace=True)

    # Define the bootstrap function
    def bootstrap(data, n=1000):
        means = []
        medians = []
        for _ in range(n):
            sample = np.random.choice(data, 100, replace=True)
            means.append(sample.mean())
            medians.append(np.median(sample))
        return np.mean(means),  np.median(medians), np.percentile(means, 5), np.percentile(means, 95)

    # Initialize lists to store results
    means = []
    medians = []
    ci = []

    # Perform bootstrap sampling for each state
    for state in delegates['State']:
        mean, median, lower, upper = bootstrap(data_24_copy[data_24_copy['state'] == state]['dt_preds'])

        means.append(round(mean, 3))
        medians.append(round(median, 3))

        ci.append(f'({round(lower, 3)}, {round(upper, 3)})')

    # Create a new DataFrame for results
    results = pd.DataFrame({
        'State': delegates['State'],
        'Model': [model_name] * len(delegates),
        'Mean': means,
        'Median': medians,
        'CI': ci
    })

    return results

# Initialize the DataFrame for predictions
delegates_preds = pd.DataFrame(columns=['State', 'Model', 'Mean', 'Median', 'CI'])
delegates_num_map = {
    'AZ': 11,
    'NV': 6,
    'WI': 10,
    'MI': 15,
    'PA': 19,
    'NC': 16,
    'GA': 16
}
# Predict and append results
for model in [lr_model, lasso_model, ridge_model]:
    results = predict_swing_states(model)
    delegates_preds = pd.concat([delegates_preds, results], ignore_index=True)

delegates_preds['Delegates'] = delegates_preds['State'].map(delegates_num_map)

delegates_preds = delegates_preds[['State', 'Delegates', 'Model', 'Mean', 'Median', 'CI']]
delegates_preds.to_csv('delegates_preds.csv', index=False, mode='a')

print(tabulate(delegates_preds[delegates_preds['Model'] == 'LinearRegression'], headers='keys', tablefmt='fancy_grid', numalign='left'))

╒════╤═════════╤═════════════╤══════════════════╤════════╤══════════╤══════════════════╕
│    │ State   │ Delegates   │ Model            │ Mean   │ Median   │ CI               │
╞════╪═════════╪═════════════╪══════════════════╪════════╪══════════╪══════════════════╡
│ 0  │ AZ      │ 11          │ LinearRegression │ 0.894  │ 0.714    │ (0.738, 1.054)   │
├────┼─────────┼─────────────┼──────────────────┼────────┼──────────┼──────────────────┤
│ 1  │ NV      │ 6           │ LinearRegression │ -2.831 │ -3.11    │ (-2.981, -2.673) │
├────┼─────────┼─────────────┼──────────────────┼────────┼──────────┼──────────────────┤
│ 2  │ WI      │ 10          │ LinearRegression │ 1.032  │ 0.984    │ (0.923, 1.141)   │
├────┼─────────┼─────────────┼──────────────────┼────────┼──────────┼──────────────────┤
│ 3  │ MI      │ 15          │ LinearRegression │ -0.531 │ -0.643   │ (-0.689, -0.373) │
├────┼─────────┼─────────────┼──────────────────┼────────┼──────────┼──────────────────┤
│ 4  │ PA      │ 19  

In [6]:
print(tabulate(delegates_preds[delegates_preds['Model'] == 'Lasso'], headers='keys', tablefmt='fancy_grid', numalign='left'))

╒════╤═════════╤═════════════╤═════════╤════════╤══════════╤══════════════════╕
│    │ State   │ Delegates   │ Model   │ Mean   │ Median   │ CI               │
╞════╪═════════╪═════════════╪═════════╪════════╪══════════╪══════════════════╡
│ 7  │ AZ      │ 11          │ Lasso   │ 1.446  │ 1.216    │ (1.301, 1.596)   │
├────┼─────────┼─────────────┼─────────┼────────┼──────────┼──────────────────┤
│ 8  │ NV      │ 6           │ Lasso   │ -1.574 │ -1.857   │ (-1.711, -1.426) │
├────┼─────────┼─────────────┼─────────┼────────┼──────────┼──────────────────┤
│ 9  │ WI      │ 10          │ Lasso   │ 0.746  │ 0.854    │ (0.644, 0.845)   │
├────┼─────────┼─────────────┼─────────┼────────┼──────────┼──────────────────┤
│ 10 │ MI      │ 15          │ Lasso   │ 0.459  │ 0.407    │ (0.306, 0.597)   │
├────┼─────────┼─────────────┼─────────┼────────┼──────────┼──────────────────┤
│ 11 │ PA      │ 19          │ Lasso   │ 0.986  │ 0.875    │ (0.844, 1.136)   │
├────┼─────────┼─────────────┼─────────┼

In [7]:
print(tabulate(delegates_preds[delegates_preds['Model'] == 'Ridge'], headers='keys', tablefmt='fancy_grid', numalign='left'))

╒════╤═════════╤═════════════╤═════════╤════════╤══════════╤══════════════════╕
│    │ State   │ Delegates   │ Model   │ Mean   │ Median   │ CI               │
╞════╪═════════╪═════════════╪═════════╪════════╪══════════╪══════════════════╡
│ 14 │ AZ      │ 11          │ Ridge   │ 0.912  │ 0.777    │ (0.734, 1.078)   │
├────┼─────────┼─────────────┼─────────┼────────┼──────────┼──────────────────┤
│ 15 │ NV      │ 6           │ Ridge   │ -2.331 │ -2.47    │ (-2.493, -2.162) │
├────┼─────────┼─────────────┼─────────┼────────┼──────────┼──────────────────┤
│ 16 │ WI      │ 10          │ Ridge   │ 1.081  │ 0.997    │ (0.975, 1.195)   │
├────┼─────────┼─────────────┼─────────┼────────┼──────────┼──────────────────┤
│ 17 │ MI      │ 15          │ Ridge   │ -0.397 │ -0.485   │ (-0.55, -0.239)  │
├────┼─────────┼─────────────┼─────────┼────────┼──────────┼──────────────────┤
│ 18 │ PA      │ 19          │ Ridge   │ 0.567  │ 0.475    │ (0.397, 0.752)   │
├────┼─────────┼─────────────┼─────────┼

In [8]:
def predict_election(delegates_data, model):
    REP_no_swings = 219
    DEM_no_swings = 226

    REP_delegates = delegates_data.loc[
        (delegates_data['Model'] == model.__class__.__name__) & (delegates_data['Mean'] > 0), 'Delegates'
    ].sum() + REP_no_swings

    DEM_delegates = delegates_data.loc[
        (delegates_data['Model'] == model.__class__.__name__) & (delegates_data['Mean'] < 0), 'Delegates'
    ].sum() + DEM_no_swings
          
    return REP_delegates, DEM_delegates

# Add to DataFrame and print with tabulate
res = pd.DataFrame(columns=['Model', 'REP', 'DEM'])

for model in [lr_model, lasso_model, ridge_model]:
    REP_delegates, DEM_delegates = predict_election(delegates_preds, model)
    res = pd.concat([res, pd.DataFrame([[model.__class__.__name__, REP_delegates, DEM_delegates]], columns=['Model', 'REP', 'DEM'])], ignore_index=True)

res.to_csv('election_preds.csv', index=False, mode='a')

print(tabulate(res, headers='keys', tablefmt='fancy_grid', numalign='left'))

╒════╤══════════════════╤═══════╤═══════╕
│    │ Model            │ REP   │ DEM   │
╞════╪══════════════════╪═══════╪═══════╡
│ 0  │ LinearRegression │ 291   │ 247   │
├────┼──────────────────┼───────┼───────┤
│ 1  │ Lasso            │ 306   │ 232   │
├────┼──────────────────┼───────┼───────┤
│ 2  │ Ridge            │ 291   │ 247   │
╘════╧══════════════════╧═══════╧═══════╛


In [9]:
import plotly.express as px
import os


def plot_election_results(delegates_data, model):
     swing_states = ['AZ', 'GA', 'MI', 'NV', 'PA', 'WI']
     REP_states = ['FL', 'TX', 'OH', 'IA', 'NC', 'GA', 'AZ']
     DEM_states = ['CA', 'NY', 'IL', 'NJ', 'WA', 'MA', 'MD']
     
     
     states_data = pd.DataFrame({
          'state': swing_states,
          'color': ['blue' if delegates_data[(delegates_data['Model'] == model.__class__.__name__) &
                                              (delegates_data['State'] == state)]['Mean'].values[0] < 0 else 'red' for state in swing_states]
     })

     red_states = pd.DataFrame({
          'state' : ['FL', 'TX', 'OH', 'IA', 'IN', 'MO', 'MT', 'NC', 'SC', 'UT', 'WY', 'AL', 'AK', 'AR', 'ID', 'KS', 'KY', 'LA', 'MS', 'NE', 'ND', 'OK', 'SD', 'TN', 'WV'],
          'color' : ['red' for _ in range(25)]
     })

     blue_states = pd.DataFrame({
          'state' : ['CA', 'NY', 'IL', 'WA', 'OR', 'MA', 'NJ', 'CT', 'RI', 'DC', 'DE', 'HI', 'MD', 'VT', 'ME', 'NH', 'NM', 'MN', 'CO', 'VA'],
          'color' : ['blue' for _ in range(20)]
     })
                     
     states_data = pd.concat([states_data, red_states])
     states_data = pd.concat([states_data, blue_states])
     
     # make title font larger
     fig = px.choropleth(
          states_data,
          locations='state',
          locationmode="USA-states",
          color='color',
          color_discrete_map={"blue": "blue", "red": "red"},
          scope="usa",
          title=f"""{model.__class__.__name__}. REP Delegates: {res[res['Model'] == model.__class__.__name__]['REP'].values[0]}; DEM Delegates: {res[res['Model'] == model.__class__.__name__]['DEM'].values[0]}"""
     )

     fig.update_layout(
          title_font_size=32
     )
     
     fig.update_layout(
          geo=dict(
               lakecolor='rgb(255, 255, 255)',
               showlakes=True,
               projection_type='albers usa'
          )
     )

     fig.update_layout(showlegend=False)    

     return fig
     
for model in [lr_model, lasso_model, ridge_model]:
     fig = plot_election_results(delegates_preds, model)
     
     os.makedirs('./images/png', exist_ok=True)
     fig.write_image(f'./images/png/{model.__class__.__name__}.png', width=1200, height=800)

     print(f"Saved {model.__class__.__name__} plot to ./images/png/{model.__class__.__name__}.png")

Saved LinearRegression plot to ./images/png/LinearRegression.png
Saved Lasso plot to ./images/png/Lasso.png
Saved Ridge plot to ./images/png/Ridge.png


# Trees

In [10]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor

dt_model = DecisionTreeRegressor()
et_model = ExtraTreesRegressor()
rf_model = RandomForestRegressor()

res_table = []

for model in [dt_model, et_model, rf_model]:
     model_name, mae, mse, rmse = train_and_evaluate_model(model, X_train, y_train, X_val, y_val)
     res_table.append([model_name, mae, mse, rmse])

pd.DataFrame(res_table, columns=['Model', 'MAE', 'MSE', 'RMSE']).to_csv('error_rates.csv', index=False, mode='a', header=False)

print(tabulate(res_table, headers=['Model', 'MAE', 'MSE', 'RMSE'], tablefmt='fancy_grid', numalign='left'))

╒═══════════════════════╤═══════╤═══════╤════════╕
│ Model                 │ MAE   │ MSE   │ RMSE   │
╞═══════════════════════╪═══════╪═══════╪════════╡
│ DecisionTreeRegressor │ 2.231 │ 8.491 │ 2.914  │
├───────────────────────┼───────┼───────┼────────┤
│ ExtraTreesRegressor   │ 1.197 │ 2.306 │ 1.519  │
├───────────────────────┼───────┼───────┼────────┤
│ RandomForestRegressor │ 1.474 │ 3.309 │ 1.819  │
╘═══════════════════════╧═══════╧═══════╧════════╛


In [11]:
delegates_preds = pd.DataFrame(columns=['State', 'Model', 'Mean', 'Median', 'CI'])

for model in [dt_model, et_model, rf_model]:
    results = predict_swing_states(model)
    delegates_preds = pd.concat([delegates_preds, results], ignore_index=True)

delegates_preds['Delegates'] = delegates_preds['State'].map(delegates_num_map)

delegates_preds = delegates_preds[['State', 'Delegates', 'Model', 'Mean', 'Median', 'CI']]

delegates_preds.to_csv('delegates_preds.csv', index=False, mode='a', header=False)

print(tabulate(delegates_preds[delegates_preds['Model'] == 'DecisionTreeRegressor'], headers='keys', tablefmt='fancy_grid', numalign='left'))

╒════╤═════════╤═════════════╤═══════════════════════╤════════╤══════════╤══════════════════╕
│    │ State   │ Delegates   │ Model                 │ Mean   │ Median   │ CI               │
╞════╪═════════╪═════════════╪═══════════════════════╪════════╪══════════╪══════════════════╡
│ 0  │ AZ      │ 11          │ DecisionTreeRegressor │ 1.658  │ 2.05     │ (1.308, 2.0)     │
├────┼─────────┼─────────────┼───────────────────────┼────────┼──────────┼──────────────────┤
│ 1  │ NV      │ 6           │ DecisionTreeRegressor │ -2.136 │ -2.4     │ (-2.556, -1.706) │
├────┼─────────┼─────────────┼───────────────────────┼────────┼──────────┼──────────────────┤
│ 2  │ WI      │ 10          │ DecisionTreeRegressor │ 0.678  │ 1.3      │ (0.445, 0.919)   │
├────┼─────────┼─────────────┼───────────────────────┼────────┼──────────┼──────────────────┤
│ 3  │ MI      │ 15          │ DecisionTreeRegressor │ -1.491 │ -1.8     │ (-1.787, -1.185) │
├────┼─────────┼─────────────┼───────────────────────┼──────

In [12]:
print(tabulate(delegates_preds[delegates_preds['Model'] == 'ExtraTreesRegressor'], headers='keys', tablefmt='fancy_grid', numalign='left'))

╒════╤═════════╤═════════════╤═════════════════════╤════════╤══════════╤══════════════════╕
│    │ State   │ Delegates   │ Model               │ Mean   │ Median   │ CI               │
╞════╪═════════╪═════════════╪═════════════════════╪════════╪══════════╪══════════════════╡
│ 7  │ AZ      │ 11          │ ExtraTreesRegressor │ 1.061  │ 0.09     │ (0.723, 1.396)   │
├────┼─────────┼─────────────┼─────────────────────┼────────┼──────────┼──────────────────┤
│ 8  │ NV      │ 6           │ ExtraTreesRegressor │ -1.62  │ -1.615   │ (-1.698, -1.537) │
├────┼─────────┼─────────────┼─────────────────────┼────────┼──────────┼──────────────────┤
│ 9  │ WI      │ 10          │ ExtraTreesRegressor │ 1.037  │ 0.983    │ (0.937, 1.137)   │
├────┼─────────┼─────────────┼─────────────────────┼────────┼──────────┼──────────────────┤
│ 10 │ MI      │ 15          │ ExtraTreesRegressor │ -0.275 │ -0.122   │ (-0.407, -0.138) │
├────┼─────────┼─────────────┼─────────────────────┼────────┼──────────┼────────

In [13]:
print(tabulate(delegates_preds[delegates_preds['Model'] == 'RandomForestRegressor'], headers='keys', tablefmt='fancy_grid', numalign='left'))

╒════╤═════════╤═════════════╤═══════════════════════╤════════╤══════════╤══════════════════╕
│    │ State   │ Delegates   │ Model                 │ Mean   │ Median   │ CI               │
╞════╪═════════╪═════════════╪═══════════════════════╪════════╪══════════╪══════════════════╡
│ 14 │ AZ      │ 11          │ RandomForestRegressor │ 0.872  │ 0.4      │ (0.642, 1.122)   │
├────┼─────────┼─────────────┼───────────────────────┼────────┼──────────┼──────────────────┤
│ 15 │ NV      │ 6           │ RandomForestRegressor │ -0.837 │ -1.108   │ (-0.952, -0.726) │
├────┼─────────┼─────────────┼───────────────────────┼────────┼──────────┼──────────────────┤
│ 16 │ WI      │ 10          │ RandomForestRegressor │ 0.186  │ 0.229    │ (0.103, 0.267)   │
├────┼─────────┼─────────────┼───────────────────────┼────────┼──────────┼──────────────────┤
│ 17 │ MI      │ 15          │ RandomForestRegressor │ 0.022  │ 0.12     │ (-0.038, 0.083)  │
├────┼─────────┼─────────────┼───────────────────────┼──────

In [14]:
res = pd.DataFrame(columns=['Model', 'REP', 'DEM'])

for model in [dt_model, et_model, rf_model]:
     REP_delegates, DEM_delegates = predict_election(delegates_preds, model)
     res = pd.concat([res, pd.DataFrame([[model.__class__.__name__, REP_delegates, DEM_delegates]], columns=['Model', 'REP', 'DEM'])], ignore_index=True)

res.to_csv('election_preds.csv', index=False, mode='a', header=False)

print(tabulate(res, headers='keys', tablefmt='fancy_grid', numalign='left'))

╒════╤═══════════════════════╤═══════╤═══════╕
│    │ Model                 │ REP   │ DEM   │
╞════╪═══════════════════════╪═══════╪═══════╡
│ 0  │ DecisionTreeRegressor │ 291   │ 247   │
├────┼───────────────────────┼───────┼───────┤
│ 1  │ ExtraTreesRegressor   │ 291   │ 247   │
├────┼───────────────────────┼───────┼───────┤
│ 2  │ RandomForestRegressor │ 306   │ 232   │
╘════╧═══════════════════════╧═══════╧═══════╛


In [15]:
for model in [dt_model, et_model, rf_model]:
     fig = plot_election_results(delegates_preds, model)

     os.makedirs('./images/png', exist_ok=True)
     fig.write_image(f'./images/png/{model.__class__.__name__}.png', width=1200, height=800)

     print(f"Saved {model.__class__.__name__} plot to ./images/png/{model.__class__.__name__}.png")

Saved DecisionTreeRegressor plot to ./images/png/DecisionTreeRegressor.png
Saved ExtraTreesRegressor plot to ./images/png/ExtraTreesRegressor.png
Saved RandomForestRegressor plot to ./images/png/RandomForestRegressor.png


# MLP

In [16]:
# mlp model

from sklearn.neural_network import MLPRegressor

mlp_model = MLPRegressor(hidden_layer_sizes=(16, 12), max_iter=1000, random_state=24)

model_name, mae, mse, rmse = train_and_evaluate_model(mlp_model, X_train, y_train, X_val, y_val)


pd.DataFrame([[model_name, mae, mse, rmse]], columns=['Model', 'MAE', 'MSE', 'RMSE']).to_csv('error_rates.csv', index=False, mode='a', header=False)

print(tabulate([[model_name, mae, mse, rmse]], headers=['Model', 'MAE', 'MSE', 'RMSE'], tablefmt='fancy_grid', numalign='left'))

╒══════════════╤═══════╤═══════╤════════╕
│ Model        │ MAE   │ MSE   │ RMSE   │
╞══════════════╪═══════╪═══════╪════════╡
│ MLPRegressor │ 1.501 │ 3.175 │ 1.782  │
╘══════════════╧═══════╧═══════╧════════╛


In [17]:
delegates_preds = pd.DataFrame(columns=['State', 'Model', 'Mean', 'Median', 'CI'])

results = predict_swing_states(mlp_model)

delegates_preds = pd.concat([delegates_preds, results], ignore_index=True)

delegates_preds['Delegates'] = delegates_preds['State'].map(delegates_num_map)

delegates_preds = delegates_preds[['State', 'Delegates', 'Model', 'Mean', 'Median', 'CI']]

delegates_preds.to_csv('delegates_preds.csv', index=False, mode='a', header=False)

print(tabulate(delegates_preds[delegates_preds['Model'] == 'MLPRegressor'], headers='keys', tablefmt='fancy_grid', numalign='left'))

╒════╤═════════╤═════════════╤══════════════╤════════╤══════════╤══════════════════╕
│    │ State   │ Delegates   │ Model        │ Mean   │ Median   │ CI               │
╞════╪═════════╪═════════════╪══════════════╪════════╪══════════╪══════════════════╡
│ 0  │ AZ      │ 11          │ MLPRegressor │ 1.08   │ 0.084    │ (0.753, 1.419)   │
├────┼─────────┼─────────────┼──────────────┼────────┼──────────┼──────────────────┤
│ 1  │ NV      │ 6           │ MLPRegressor │ -2.825 │ -2.418   │ (-3.123, -2.523) │
├────┼─────────┼─────────────┼──────────────┼────────┼──────────┼──────────────────┤
│ 2  │ WI      │ 10          │ MLPRegressor │ 1.002  │ 0.665    │ (0.787, 1.211)   │
├────┼─────────┼─────────────┼──────────────┼────────┼──────────┼──────────────────┤
│ 3  │ MI      │ 15          │ MLPRegressor │ -0.999 │ -1.236   │ (-1.259, -0.728) │
├────┼─────────┼─────────────┼──────────────┼────────┼──────────┼──────────────────┤
│ 4  │ PA      │ 19          │ MLPRegressor │ -0.306 │ -0.324   │

In [18]:
res = pd.DataFrame(columns=['Model', 'REP', 'DEM'])

REP_delegates, DEM_delegates = predict_election(delegates_preds, mlp_model)

res = pd.concat([res, pd.DataFrame([['MLPRegressor', REP_delegates, DEM_delegates]], columns=['Model', 'REP', 'DEM'])], ignore_index=True)

res.to_csv('election_preds.csv', index=False, mode='a', header=False)

print(tabulate(res, headers='keys', tablefmt='fancy_grid', numalign='left'))

╒════╤══════════════╤═══════╤═══════╕
│    │ Model        │ REP   │ DEM   │
╞════╪══════════════╪═══════╪═══════╡
│ 0  │ MLPRegressor │ 272   │ 266   │
╘════╧══════════════╧═══════╧═══════╛


In [19]:
fig = plot_election_results(delegates_preds, mlp_model)

os.makedirs('./images/png', exist_ok=True)

fig.write_image(f'./images/png/MLPRegressor.png', width=1200, height=800)

print(f"Saved MLPRegressor plot to ./images/png/MLPRegressor.png")

Saved MLPRegressor plot to ./images/png/MLPRegressor.png


# Boosting

In [20]:
from xgboost import XGBRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor

xgb_model = XGBRegressor()
ada_model = AdaBoostRegressor()
gb_model = GradientBoostingRegressor()

res_table = []

for model in [xgb_model, ada_model, gb_model]:
     model_name, mae, mse, rmse = train_and_evaluate_model(model, X_train, y_train, X_val, y_val)
     res_table.append([model_name, mae, mse, rmse])

pd.DataFrame(res_table, columns=['Model', 'MAE', 'MSE', 'RMSE']).to_csv('error_rates.csv', index=False, mode='a', header=False)

print(tabulate(res_table, headers=['Model', 'MAE', 'MSE', 'RMSE'], tablefmt='fancy_grid', numalign='left'))

╒═══════════════════════════╤═══════╤═══════╤════════╕
│ Model                     │ MAE   │ MSE   │ RMSE   │
╞═══════════════════════════╪═══════╪═══════╪════════╡
│ XGBRegressor              │ 1.481 │ 3.517 │ 1.875  │
├───────────────────────────┼───────┼───────┼────────┤
│ AdaBoostRegressor         │ 1.592 │ 3.524 │ 1.877  │
├───────────────────────────┼───────┼───────┼────────┤
│ GradientBoostingRegressor │ 1.524 │ 3.263 │ 1.806  │
╘═══════════════════════════╧═══════╧═══════╧════════╛


In [21]:
delegates_preds = pd.DataFrame(columns=['State', 'Model', 'Mean', 'Median', 'CI'])

for model in [xgb_model, ada_model, gb_model]:
     results = predict_swing_states(model)
     delegates_preds = pd.concat([delegates_preds, results], ignore_index=True)

delegates_preds['Delegates'] = delegates_preds['State'].map(delegates_num_map)

delegates_preds = delegates_preds[['State', 'Delegates', 'Model', 'Mean', 'Median', 'CI']]

delegates_preds.to_csv('delegates_preds.csv', index=False, mode='a', header=False)

print(tabulate(delegates_preds[delegates_preds['Model'] == 'XGBRegressor'], headers='keys', tablefmt='fancy_grid', numalign='left'))

╒════╤═════════╤═════════════╤══════════════╤════════╤══════════╤══════════════════╕
│    │ State   │ Delegates   │ Model        │ Mean   │ Median   │ CI               │
╞════╪═════════╪═════════════╪══════════════╪════════╪══════════╪══════════════════╡
│ 0  │ AZ      │ 11          │ XGBRegressor │ 0.757  │ 0.492    │ (0.501, 1.023)   │
├────┼─────────┼─────────────┼──────────────┼────────┼──────────┼──────────────────┤
│ 1  │ NV      │ 6           │ XGBRegressor │ -1.172 │ -1.931   │ (-1.416, -0.909) │
├────┼─────────┼─────────────┼──────────────┼────────┼──────────┼──────────────────┤
│ 2  │ WI      │ 10          │ XGBRegressor │ 0.151  │ 0.115    │ (0.019, 0.291)   │
├────┼─────────┼─────────────┼──────────────┼────────┼──────────┼──────────────────┤
│ 3  │ MI      │ 15          │ XGBRegressor │ -0.52  │ -0.587   │ (-0.63, -0.403)  │
├────┼─────────┼─────────────┼──────────────┼────────┼──────────┼──────────────────┤
│ 4  │ PA      │ 19          │ XGBRegressor │ 0.268  │ 0.282    │

In [22]:
print(tabulate(delegates_preds[delegates_preds['Model'] == 'AdaBoostRegressor'], headers='keys', tablefmt='fancy_grid', numalign='left'))

╒════╤═════════╤═════════════╤═══════════════════╤════════╤══════════╤══════════════════╕
│    │ State   │ Delegates   │ Model             │ Mean   │ Median   │ CI               │
╞════╪═════════╪═════════════╪═══════════════════╪════════╪══════════╪══════════════════╡
│ 7  │ AZ      │ 11          │ AdaBoostRegressor │ 1.169  │ 0.664    │ (0.959, 1.397)   │
├────┼─────────┼─────────────┼───────────────────┼────────┼──────────┼──────────────────┤
│ 8  │ NV      │ 6           │ AdaBoostRegressor │ -0.315 │ -0.733   │ (-0.466, -0.169) │
├────┼─────────┼─────────────┼───────────────────┼────────┼──────────┼──────────────────┤
│ 9  │ WI      │ 10          │ AdaBoostRegressor │ 0.328  │ 0.3      │ (0.219, 0.434)   │
├────┼─────────┼─────────────┼───────────────────┼────────┼──────────┼──────────────────┤
│ 10 │ MI      │ 15          │ AdaBoostRegressor │ 0.184  │ 0.257    │ (0.061, 0.311)   │
├────┼─────────┼─────────────┼───────────────────┼────────┼──────────┼──────────────────┤
│ 11 │ PA 

In [23]:
print(tabulate(delegates_preds[delegates_preds['Model'] == 'GradientBoostingRegressor'], headers='keys', tablefmt='fancy_grid', numalign='left'))

╒════╤═════════╤═════════════╤═══════════════════════════╤════════╤══════════╤══════════════════╕
│    │ State   │ Delegates   │ Model                     │ Mean   │ Median   │ CI               │
╞════╪═════════╪═════════════╪═══════════════════════════╪════════╪══════════╪══════════════════╡
│ 14 │ AZ      │ 11          │ GradientBoostingRegressor │ 0.796  │ 0.672    │ (0.566, 1.03)    │
├────┼─────────┼─────────────┼───────────────────────────┼────────┼──────────┼──────────────────┤
│ 15 │ NV      │ 6           │ GradientBoostingRegressor │ -1.946 │ -2.133   │ (-2.063, -1.832) │
├────┼─────────┼─────────────┼───────────────────────────┼────────┼──────────┼──────────────────┤
│ 16 │ WI      │ 10          │ GradientBoostingRegressor │ 0.084  │ 0.004    │ (0.027, 0.144)   │
├────┼─────────┼─────────────┼───────────────────────────┼────────┼──────────┼──────────────────┤
│ 17 │ MI      │ 15          │ GradientBoostingRegressor │ -0.469 │ -0.422   │ (-0.534, -0.404) │
├────┼─────────┼────

In [24]:
res = pd.DataFrame(columns=['Model', 'REP', 'DEM'])

for model in [xgb_model, ada_model, gb_model]:
     REP_delegates, DEM_delegates = predict_election(delegates_preds, model)
     res = pd.concat([res, pd.DataFrame([[model.__class__.__name__, REP_delegates, DEM_delegates]], columns=['Model', 'REP', 'DEM'])], ignore_index=True)

res.to_csv('election_preds.csv', index=False, mode='a', header=False)

print(tabulate(res, headers='keys', tablefmt='fancy_grid', numalign='left'))

╒════╤═══════════════════════════╤═══════╤═══════╕
│    │ Model                     │ REP   │ DEM   │
╞════╪═══════════════════════════╪═══════╪═══════╡
│ 0  │ XGBRegressor              │ 291   │ 247   │
├────┼───────────────────────────┼───────┼───────┤
│ 1  │ AdaBoostRegressor         │ 306   │ 232   │
├────┼───────────────────────────┼───────┼───────┤
│ 2  │ GradientBoostingRegressor │ 291   │ 247   │
╘════╧═══════════════════════════╧═══════╧═══════╛


In [25]:
for model in [xgb_model, ada_model, gb_model]:
     fig = plot_election_results(delegates_preds, model)

     os.makedirs('./images/png', exist_ok=True)
     fig.write_image(f'./images/png/{model.__class__.__name__}.png', width=1200, height=800)

     print(f"Saved {model.__class__.__name__} plot to ./images/png/{model.__class__.__name__}.png")

Saved XGBRegressor plot to ./images/png/XGBRegressor.png
Saved AdaBoostRegressor plot to ./images/png/AdaBoostRegressor.png
Saved GradientBoostingRegressor plot to ./images/png/GradientBoostingRegressor.png


# Binary models

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR

nb_model = GaussianNB()
knn_model = KNeighborsRegressor()
svm_model = SVR()

res_table = []

y_train_binned = y_train.copy().astype(int)
y_val_binned = y_val.copy().astype(int)

for model in [nb_model, knn_model, svm_model]:
     model_name, mae, mse, rmse = train_and_evaluate_model(model, X_train, y_train_binned, X_val, y_val_binned)
     res_table.append([model_name, mae, mse, rmse])

pd.DataFrame(res_table, columns=['Model', 'MAE', 'MSE', 'RMSE']).to_csv('error_rates.csv', index=False, mode='a', header=False)

print(tabulate(res_table, headers=['Model', 'MAE', 'MSE', 'RMSE'], tablefmt='fancy_grid', numalign='left'))

╒═════════════════════╤═══════╤════════╤════════╕
│ Model               │ MAE   │ MSE    │ RMSE   │
╞═════════════════════╪═══════╪════════╪════════╡
│ GaussianNB          │ 2.812 │ 11.062 │ 3.326  │
├─────────────────────┼───────┼────────┼────────┤
│ KNeighborsRegressor │ 1.55  │ 3.495  │ 1.869  │
├─────────────────────┼───────┼────────┼────────┤
│ SVR                 │ 1.846 │ 4.624  │ 2.15   │
╘═════════════════════╧═══════╧════════╧════════╛


In [28]:
delegates_preds = pd.DataFrame(columns=['State', 'Model', 'Mean', 'Median', 'CI'])

for model in [nb_model, knn_model, svm_model]:
     results = predict_swing_states(model)
     delegates_preds = pd.concat([delegates_preds, results], ignore_index=True)

delegates_preds['Delegates'] = delegates_preds['State'].map(delegates_num_map)

delegates_preds = delegates_preds[['State', 'Delegates', 'Model', 'Mean', 'Median', 'CI']]

delegates_preds.to_csv('delegates_preds.csv', index=False, mode='a', header=False)

print(tabulate(delegates_preds[delegates_preds['Model'] == 'GaussianNB'], headers='keys', tablefmt='fancy_grid', numalign='left'))

╒════╤═════════╤═════════════╤════════════╤════════╤══════════╤════════════════╕
│    │ State   │ Delegates   │ Model      │ Mean   │ Median   │ CI             │
╞════╪═════════╪═════════════╪════════════╪════════╪══════════╪════════════════╡
│ 0  │ AZ      │ 11          │ GaussianNB │ 3.216  │ 3        │ (2.74, 3.71)   │
├────┼─────────┼─────────────┼────────────┼────────┼──────────┼────────────────┤
│ 1  │ NV      │ 6           │ GaussianNB │ 1.632  │ 2        │ (1.31, 1.96)   │
├────┼─────────┼─────────────┼────────────┼────────┼──────────┼────────────────┤
│ 2  │ WI      │ 10          │ GaussianNB │ 3.677  │ 4        │ (3.24, 4.12)   │
├────┼─────────┼─────────────┼────────────┼────────┼──────────┼────────────────┤
│ 3  │ MI      │ 15          │ GaussianNB │ -2.134 │ -2       │ (-2.45, -1.84) │
├────┼─────────┼─────────────┼────────────┼────────┼──────────┼────────────────┤
│ 4  │ PA      │ 19          │ GaussianNB │ 2.76   │ 3        │ (2.35, 3.19)   │
├────┼─────────┼────────────

In [29]:
print(tabulate(delegates_preds[delegates_preds['Model'] == 'KNeighborsRegressor'], headers='keys', tablefmt='fancy_grid', numalign='left'))

╒════╤═════════╤═════════════╤═════════════════════╤════════╤══════════╤════════════════╕
│    │ State   │ Delegates   │ Model               │ Mean   │ Median   │ CI             │
╞════╪═════════╪═════════════╪═════════════════════╪════════╪══════════╪════════════════╡
│ 7  │ AZ      │ 11          │ KNeighborsRegressor │ 1.706  │ 1.3      │ (1.4, 2.014)   │
├────┼─────────┼─────────────┼─────────────────────┼────────┼──────────┼────────────────┤
│ 8  │ NV      │ 6           │ KNeighborsRegressor │ 0.597  │ 0        │ (0.3, 0.924)   │
├────┼─────────┼─────────────┼─────────────────────┼────────┼──────────┼────────────────┤
│ 9  │ WI      │ 10          │ KNeighborsRegressor │ 0.293  │ 0.4      │ (0.128, 0.454) │
├────┼─────────┼─────────────┼─────────────────────┼────────┼──────────┼────────────────┤
│ 10 │ MI      │ 15          │ KNeighborsRegressor │ 0.345  │ 0.3      │ (0.006, 0.66)  │
├────┼─────────┼─────────────┼─────────────────────┼────────┼──────────┼────────────────┤
│ 11 │ PA 

In [30]:
print(tabulate(delegates_preds[delegates_preds['Model'] == 'SVR'], headers='keys', tablefmt='fancy_grid', numalign='left'))

╒════╤═════════╤═════════════╤═════════╤════════╤══════════╤═════════════════╕
│    │ State   │ Delegates   │ Model   │ Mean   │ Median   │ CI              │
╞════╪═════════╪═════════════╪═════════╪════════╪══════════╪═════════════════╡
│ 14 │ AZ      │ 11          │ SVR     │ 1.776  │ 0.728    │ (1.436, 2.145)  │
├────┼─────────┼─────────────┼─────────┼────────┼──────────┼─────────────────┤
│ 15 │ NV      │ 6           │ SVR     │ 0.407  │ -0.639   │ (0.07, 0.764)   │
├────┼─────────┼─────────────┼─────────┼────────┼──────────┼─────────────────┤
│ 16 │ WI      │ 10          │ SVR     │ 0.492  │ 0.515    │ (0.33, 0.647)   │
├────┼─────────┼─────────────┼─────────┼────────┼──────────┼─────────────────┤
│ 17 │ MI      │ 15          │ SVR     │ 0.036  │ -0.118   │ (-0.175, 0.243) │
├────┼─────────┼─────────────┼─────────┼────────┼──────────┼─────────────────┤
│ 18 │ PA      │ 19          │ SVR     │ 0.363  │ -0.034   │ (0.183, 0.55)   │
├────┼─────────┼─────────────┼─────────┼────────┼───

In [31]:
res = pd.DataFrame(columns=['Model', 'REP', 'DEM'])

for model in [nb_model, knn_model, svm_model]:
     REP_delegates, DEM_delegates = predict_election(delegates_preds, model)
     res = pd.concat([res, pd.DataFrame([[model.__class__.__name__, REP_delegates, DEM_delegates]], columns=['Model', 'REP', 'DEM'])])

res.to_csv('election_preds.csv', index=False, mode='a', header=False)

print(tabulate(res, headers='keys', tablefmt='fancy_grid', numalign='left'))

╒════╤═════════════════════╤═══════╤═══════╕
│    │ Model               │ REP   │ DEM   │
╞════╪═════════════════════╪═══════╪═══════╡
│ 0  │ GaussianNB          │ 297   │ 241   │
├────┼─────────────────────┼───────┼───────┤
│ 0  │ KNeighborsRegressor │ 312   │ 226   │
├────┼─────────────────────┼───────┼───────┤
│ 0  │ SVR                 │ 312   │ 226   │
╘════╧═════════════════════╧═══════╧═══════╛


# Voting Regressor

In [None]:
from sklearn.ensemble import VotingRegressor

voting_model = VotingRegressor(estimators=[
     ('lr', lr_model),
     ('lasso', lasso_model),
     ('ridge', ridge_model),
     ('dt', dt_model),
     ('et', et_model),
     ('rf', rf_model),
     ('mlp_model', mlp_model),
     ('xgb', xgb_model),
     ('ada', ada_model),
     ('gb', gb_model),
     ('mlp', mlp_model),
     ('knn', knn_model),
     ('svm', svm_model)
], weights = [1.8, 0.3, 1.8, 2, 1.5, 2, 5, 1.5, 0.5, 1.3, 5, 0.5, 0.5])

model_name, mae, mse, rmse = train_and_evaluate_model(voting_model, X_train, y_train, X_val, y_val)

pd.DataFrame([[model_name, mae, mse, rmse]], columns=['Model', 'MAE', 'MSE', 'RMSE']).to_csv('error_rates.csv', index=False, mode='a', header=False)

print(tabulate([[model_name, mae, mse, rmse]], headers=['Model', 'MAE', 'MSE', 'RMSE'], tablefmt='fancy_grid', numalign='left'))

╒═════════════════╤═══════╤═══════╤════════╕
│ Model           │ MAE   │ MSE   │ RMSE   │
╞═════════════════╪═══════╪═══════╪════════╡
│ VotingRegressor │ 1.203 │ 2.226 │ 1.492  │
╘═════════════════╧═══════╧═══════╧════════╛


In [33]:
delegates_preds = pd.DataFrame(columns=['State', 'Model', 'Mean', 'Median', 'CI'])

results = predict_swing_states(voting_model)

delegates_preds = pd.concat([delegates_preds, results], ignore_index=True)

delegates_preds['Delegates'] = delegates_preds['State'].map(delegates_num_map)

delegates_preds = delegates_preds[['State', 'Delegates', 'Model', 'Mean', 'Median', 'CI']]

delegates_preds.to_csv('delegates_preds.csv', index=False, mode='a', header=False)

print(tabulate(delegates_preds[delegates_preds['Model'] == 'VotingRegressor'], headers='keys', tablefmt='fancy_grid', numalign='left'))

╒════╤═════════╤═════════════╤═════════════════╤════════╤══════════╤══════════════════╕
│    │ State   │ Delegates   │ Model           │ Mean   │ Median   │ CI               │
╞════╪═════════╪═════════════╪═════════════════╪════════╪══════════╪══════════════════╡
│ 0  │ AZ      │ 11          │ VotingRegressor │ 1.008  │ 0.499    │ (0.777, 1.242)   │
├────┼─────────┼─────────────┼─────────────────┼────────┼──────────┼──────────────────┤
│ 1  │ NV      │ 6           │ VotingRegressor │ -2.123 │ -2.287   │ (-2.303, -1.94)  │
├────┼─────────┼─────────────┼─────────────────┼────────┼──────────┼──────────────────┤
│ 2  │ WI      │ 10          │ VotingRegressor │ 0.776  │ 0.794    │ (0.645, 0.904)   │
├────┼─────────┼─────────────┼─────────────────┼────────┼──────────┼──────────────────┤
│ 3  │ MI      │ 15          │ VotingRegressor │ -0.675 │ -0.786   │ (-0.828, -0.519) │
├────┼─────────┼─────────────┼─────────────────┼────────┼──────────┼──────────────────┤
│ 4  │ PA      │ 19          │ V

In [34]:
res = pd.DataFrame(columns=['Model', 'REP', 'DEM'])

REP_delegates, DEM_delegates = predict_election(delegates_preds, voting_model)

res = pd.concat([res, pd.DataFrame([['VotingRegressor', REP_delegates, DEM_delegates]], columns=['Model', 'REP', 'DEM'])])

res.to_csv('election_preds.csv', index=False, mode='a', header=False)

print(tabulate(res, headers='keys', tablefmt='fancy_grid', numalign='left'))

╒════╤═════════════════╤═══════╤═══════╕
│    │ Model           │ REP   │ DEM   │
╞════╪═════════════════╪═══════╪═══════╡
│ 0  │ VotingRegressor │ 291   │ 247   │
╘════╧═════════════════╧═══════╧═══════╛


In [36]:
fig = plot_election_results(delegates_preds, voting_model)

os.makedirs('./images/png', exist_ok=True)

fig.write_image(f'./images/png/VotingRegressor.png', width=1200, height=800)

<div style="display: flex; justify-content: space-around;">
    <img src="./images/png/VotingRegressor.png" alt="Image 1" style="width: 45%;"/>
    <img src="./images/png/VotingRegressor.png" alt="Image 2" style="width: 45%;"/>
</div>
