# Prediction using models

In [336]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

from sklearn.linear_model import LinearRegression

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import Adam

### Load datasets

In [337]:
training_df = pd.read_csv('./Training Datasets/training_pres.csv')

## Vote prediction

In [338]:
X_vote = training_df.drop(columns=['GEO_ID', 'democrat_percent'])
y_vote = training_df['democrat_percent']
X_vote_train, X_vote_test, y_vote_train, y_vote_test = train_test_split(X_vote, y_vote, test_size=0.2)

### Linear regression

In [339]:
lin_reg_vote = LinearRegression()
lin_reg_vote.fit(X_vote_train, y_vote_train)

y_vote_pred = lin_reg_vote.predict(X_vote_test)

mse = mean_squared_error(y_vote_test, y_vote_pred)
r2 = r2_score(y_vote_test, y_vote_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

Mean Squared Error: 0.0027225033105916
R^2 Score: 0.5835469985015678


### FFNN

In [348]:
ffnn_vote = Sequential([
    Dense(32, input_dim=X_vote_train.shape[1], activation='relu'),
    Dense(16, activation='relu'),
    Dense(1, activation='linear')
])
# plot_model(ffnn_vote, to_file='model_structure.png', show_shapes=True, show_layer_names=True)
ffnn_vote.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
ffnn_vote.fit(X_vote_train, y_vote_train, epochs=100, batch_size=16, validation_split=0.2, callbacks=[early_stopping])


y_vote_pred = ffnn_vote.predict(X_vote_test)

mse = tf.keras.losses.MeanSquaredError()
print(f'Mean Squared Error: {mse(y_vote_test, y_vote_pred).numpy()}')
print(f'R^2 Score: {r2_score(y_vote_test, y_vote_pred)}')

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - loss: 0.7143 - val_loss: 0.5231
Epoch 2/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.4614 - val_loss: 0.3009
Epoch 3/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.2398 - val_loss: 0.0948
Epoch 4/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0562 - val_loss: 0.0177
Epoch 5/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0218 - val_loss: 0.0134
Epoch 6/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0178 - val_loss: 0.0085
Epoch 7/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0102 - val_loss: 0.0063
Epoch 8/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0101 - val_loss: 0.0052
Epoch 9/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

### Predictions

In [341]:
feature_names = ['White alone',
                 'Black or African American alone',
                 'American Indian and Alaska Native alone',
                 'Asian alone',
                 'Native Hawaiian and Other Pacific Islander alone',
                 'Some Other Race alone',
                 'White; Black or African American',
                 'Black or African American; American Indian and Alaska Native',
                 'White; American Indian and Alaska Native',
                 'White; Asian',
                 'White; Native Hawaiian and Other Pacific Islander',
                 'White; Some Other Race',
                 'Black or African American; Asian',
                 'Black or African American; Native Hawaiian and Other Pacific Islander',
                 'Black or African American; Some Other Race',
                 'American Indian and Alaska Native; Asian',
                 'American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander',
                 'American Indian and Alaska Native; Some Other Race',
                 'Asian; Native Hawaiian and Other Pacific Islander',
                 'Asian; Some Other Race',
                 'Native Hawaiian and Other Pacific Islander; Some Other Race',
                 'Occupied_percent', 
                 'Turnout']
#              W      B     I       A     H       s      WB     BI    WI   WA     WH    Ws   BA    BH     Bs     IA     IH     Is     AH     As     Hs    O    T                                         
vote_2024 = [[0.33, 0.40, 0.003, 0.094, 0.0003, 0.087, 0.012, 0.04, 0.04, 0.09, 0.002, 0.4, 0.015, 0.001, 0.06, 0.002, 0.0001, 0.02, 0.003, 0.007, 0.003, 0.9, 0.6]]
vote_2024 = pd.DataFrame(vote_2024, columns=feature_names)
vote_2024_pred_lin = lin_reg_vote.predict(vote_2024)*100
vote_2024_pred_ffnn = ffnn_vote.predict(vote_2024)*100
print(f'Predicted Democrat Vote Percentage of the Linear Regression Model: {vote_2024_pred_lin[0]:.2f}%')
print(f'Predicted Democrat Vote Percentage of the FFNN: {vote_2024_pred[0][0]:.2f}%')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
Predicted Democrat Vote Percentage of the Linear Regression Model: 82.36%
Predicted Democrat Vote Percentage of the FFNN: 73.03%


## Turnout

In [342]:
X_turnout = training_df.drop(columns=['GEO_ID', 'Turnout'])
y_turnout = training_df['Turnout']
X_turnout_train, X_turnout_test, y_turnout_train, y_turnout_test = train_test_split(X_turnout, y_turnout, test_size=0.2)

### Linear regression

In [343]:
lin_reg_turnout = LinearRegression()
lin_reg_turnout.fit(X_turnout_train, y_turnout_train)

y_turnout_pred = lin_reg_turnout.predict(X_turnout_test)

mse = mean_squared_error(y_turnout_test, y_turnout_pred)
r2 = r2_score(y_turnout_test, y_turnout_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

Mean Squared Error: 0.005935164694648563
R^2 Score: 0.7462537099494209


### FFNN

In [349]:
ffnn_turnout = Sequential([
    Dense(32, input_dim=X_turnout_train.shape[1], activation='relu'),
    Dense(16, activation='relu'),
    Dense(1, activation='linear')
])
# plot_model(ffnn_turnout, to_file='model_structure.png', show_shapes=True, show_layer_names=True)
ffnn_turnout.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
ffnn_turnout.fit(X_turnout_train, y_turnout_train, epochs=100, batch_size=16, validation_split=0.1, callbacks=[early_stopping])

y_turnout_pred = ffnn_turnout.predict(X_turnout_test)

mse = tf.keras.losses.MeanSquaredError()
print(f'Mean Squared Error: {mse(y_turnout_test, y_turnout_pred).numpy()}')
print(f'R^2 Score: {r2_score(y_turnout_test, y_turnout_pred)}')

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.1990 - val_loss: 0.0481
Epoch 2/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0277 - val_loss: 0.0137
Epoch 3/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0154 - val_loss: 0.0085
Epoch 4/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0091 - val_loss: 0.0075
Epoch 5/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0059 - val_loss: 0.0062
Epoch 6/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0068 - val_loss: 0.0052
Epoch 7/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0045 - val_loss: 0.0047
Epoch 8/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0049 - val_loss: 0.0045
Epoch 9/100
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

### Predictions

In [350]:
feature_names = ['White alone',
                 'Black or African American alone',
                 'American Indian and Alaska Native alone',
                 'Asian alone',
                 'Native Hawaiian and Other Pacific Islander alone',
                 'Some Other Race alone',
                 'White; Black or African American',
                 'Black or African American; American Indian and Alaska Native',
                 'White; American Indian and Alaska Native',
                 'White; Asian',
                 'White; Native Hawaiian and Other Pacific Islander',
                 'White; Some Other Race',
                 'Black or African American; Asian',
                 'Black or African American; Native Hawaiian and Other Pacific Islander',
                 'Black or African American; Some Other Race',
                 'American Indian and Alaska Native; Asian',
                 'American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander',
                 'American Indian and Alaska Native; Some Other Race',
                 'Asian; Native Hawaiian and Other Pacific Islander',
                 'Asian; Some Other Race',
                 'Native Hawaiian and Other Pacific Islander; Some Other Race',
                 'Occupied_percent', 
                 'democrat_percent']
#              W      B     I       A     H       s      WB     BI    WI   WA     WH    Ws   BA    BH     Bs     IA     IH     Is     AH     As     Hs    O    T                                         
turnout_2024 = [[0.33, 0.40, 0.003, 0.094, 0.0003, 0.087, 0.012, 0.04, 0.04, 0.09, 0.002, 0.4, 0.015, 0.001, 0.06, 0.002, 0.0001, 0.02, 0.003, 0.007, 0.003, 0.9, 0.8]]
turnout_2024 = pd.DataFrame(turnout_2024, columns=feature_names)
turnout_2024_pred_lin = lin_reg_turnout.predict(turnout_2024)*100
turnout_2024_pred_ffnn = ffnn_turnout.predict(turnout_2024)*100
print(f'Predicted Democrat Vote Percentage of the Linear Regression Model: {turnout_2024_pred_lin[0]:.2f}%')
print(f'Predicted Democrat Vote Percentage of the FFNN: {turnout_2024_pred_ffnn[0][0]:.2f}%')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Predicted Democrat Vote Percentage of the Linear Regression Model: 55.19%
Predicted Democrat Vote Percentage of the FFNN: 66.49%
