This notebook contains neural network model utilization to generate election outcome predictions for the 2023 county dataset.

Please note: The loading and fitting of this algorithm was conducted remotely via Google Colab. As such, the 2023 county data was uploaded in memory for the session as well as the model itself ("Optimized_Model.keras") and the model's scaler ("Optimized_Model_Scaler.pkl") The cleaned 2023 county data can be found within the repository's 'Resource' directory titled, "2023_County_Final_With_Segments.csv".

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
!pip install keras-tuner
import keras_tuner as kt
import joblib


Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [2]:
# Import the optimized neural network model and its scaler
loaded_model = tf.keras.models.load_model('Optimized_Model.keras')

# Load the saved scaler
scaler = joblib.load('Optimized_Model_Scaler.pkl')


  saveable.load_own_variables(weights_store.get(inner_path))


In [3]:
# Import the 2023 county data
county_data_2023 = pd.read_csv("2023_County_Final_With_Segments.csv")
county_data_2023


Unnamed: 0,Year,County,% Male,% Female,% Male > 18 Years Old,% Female > 18 Years Old,% White,% Black or African American,% American Indian and Alaska Native,% Asian,...,% Nonveteran Population 75 Years Old & Over,Nonveteran Labor Force Participation Rate (%),Nonveteran Unemployment Rate (%),% Nonveteran Population With Income Below Poverty Level (Past 12 Months),% Nonveteran Population With Any Disability,Birth Rate Per 1000 Women (15-19 Years Old),Birth Rate Per 1000 Women (20-34 Years Old),Birth Rate Per 1000 Women (35-50 Years Old),Received Public Assistance Income Rate Per 1000 Women (Past 12 Months),Segment
0,2023,ADAMS,0.492,0.508,0.49,0.51,0.882,0.013,0.004,0.008,...,0.096,0.821,0.035,0.079,0.141,0.0,94.0,49.0,97.555556,2
1,2023,ALLEGHENY,0.488,0.512,0.482,0.518,0.759,0.125,0.001,0.042,...,0.09,0.811,0.039,0.11,0.144,9.0,79.0,35.0,189.0,0
2,2023,ARMSTRONG,0.499,0.501,0.497,0.503,0.951,0.006,0.0,0.001,...,0.103,0.769,0.027,0.106,0.186,30.0,123.0,9.0,68.0,1
3,2023,BEAVER,0.495,0.505,0.487,0.513,0.859,0.054,0.002,0.003,...,0.102,0.801,0.027,0.1,0.174,0.0,67.0,26.0,222.0,2
4,2023,BERKS,0.495,0.505,0.49,0.51,0.692,0.041,0.007,0.014,...,0.084,0.786,0.052,0.104,0.155,6.0,102.0,45.0,57.0,2
5,2023,BLAIR,0.497,0.503,0.492,0.508,0.905,0.024,0.0,0.008,...,0.102,0.757,0.032,0.147,0.189,0.0,64.0,49.0,0.0,1
6,2023,BUCKS,0.495,0.505,0.49,0.51,0.809,0.032,0.002,0.05,...,0.096,0.823,0.036,0.066,0.129,0.0,84.0,30.0,199.0,0
7,2023,BUTLER,0.498,0.502,0.494,0.506,0.928,0.009,0.003,0.014,...,0.092,0.781,0.043,0.087,0.145,0.0,82.0,34.0,0.0,2
8,2023,CAMBRIA,0.498,0.502,0.494,0.506,0.906,0.032,0.0,0.004,...,0.105,0.748,0.042,0.12,0.216,7.0,107.0,39.0,27.0,1
9,2023,CARBON,0.51,0.49,0.508,0.492,0.872,0.023,0.016,0.007,...,0.096,0.773,0.065,0.118,0.2,6.589744,89.384615,32.102564,97.555556,1


In [4]:
# Drop the categorical 'County' information column to adjust the array's shape to match proper input dimensions within the model
county_data_2023_no_county = county_data_2023.drop('County', axis=1)
county_data_2023_no_county


Unnamed: 0,Year,% Male,% Female,% Male > 18 Years Old,% Female > 18 Years Old,% White,% Black or African American,% American Indian and Alaska Native,% Asian,% High School Graduate (18-24),...,% Nonveteran Population 75 Years Old & Over,Nonveteran Labor Force Participation Rate (%),Nonveteran Unemployment Rate (%),% Nonveteran Population With Income Below Poverty Level (Past 12 Months),% Nonveteran Population With Any Disability,Birth Rate Per 1000 Women (15-19 Years Old),Birth Rate Per 1000 Women (20-34 Years Old),Birth Rate Per 1000 Women (35-50 Years Old),Received Public Assistance Income Rate Per 1000 Women (Past 12 Months),Segment
0,2023,0.492,0.508,0.49,0.51,0.882,0.013,0.004,0.008,0.398,...,0.096,0.821,0.035,0.079,0.141,0.0,94.0,49.0,97.555556,2
1,2023,0.488,0.512,0.482,0.518,0.759,0.125,0.001,0.042,0.374,...,0.09,0.811,0.039,0.11,0.144,9.0,79.0,35.0,189.0,0
2,2023,0.499,0.501,0.497,0.503,0.951,0.006,0.0,0.001,0.471,...,0.103,0.769,0.027,0.106,0.186,30.0,123.0,9.0,68.0,1
3,2023,0.495,0.505,0.487,0.513,0.859,0.054,0.002,0.003,0.431,...,0.102,0.801,0.027,0.1,0.174,0.0,67.0,26.0,222.0,2
4,2023,0.495,0.505,0.49,0.51,0.692,0.041,0.007,0.014,0.473,...,0.084,0.786,0.052,0.104,0.155,6.0,102.0,45.0,57.0,2
5,2023,0.497,0.503,0.492,0.508,0.905,0.024,0.0,0.008,0.387,...,0.102,0.757,0.032,0.147,0.189,0.0,64.0,49.0,0.0,1
6,2023,0.495,0.505,0.49,0.51,0.809,0.032,0.002,0.05,0.395,...,0.096,0.823,0.036,0.066,0.129,0.0,84.0,30.0,199.0,0
7,2023,0.498,0.502,0.494,0.506,0.928,0.009,0.003,0.014,0.487,...,0.092,0.781,0.043,0.087,0.145,0.0,82.0,34.0,0.0,2
8,2023,0.498,0.502,0.494,0.506,0.906,0.032,0.0,0.004,0.359,...,0.105,0.748,0.042,0.12,0.216,7.0,107.0,39.0,27.0,1
9,2023,0.51,0.49,0.508,0.492,0.872,0.023,0.016,0.007,0.541,...,0.096,0.773,0.065,0.118,0.2,6.589744,89.384615,32.102564,97.555556,1


In [5]:
# Scale the 2023 county data using the pre-trained scaler from pre-trained optimized model
county_data_2023_scaled = scaler.transform(county_data_2023_no_county)


In [6]:
# Use the model to generate binary classifier predictions for out 2023 county data
predictions = loaded_model.predict(county_data_2023_scaled)
predictions


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step


array([[9.70640123e-01],
       [8.05732654e-08],
       [1.00000000e+00],
       [8.22325647e-01],
       [3.40105407e-03],
       [9.99651253e-01],
       [1.99324433e-08],
       [3.98554020e-02],
       [1.00000000e+00],
       [1.00000000e+00],
       [1.00950908e-03],
       [6.63155575e-09],
       [1.00000000e+00],
       [9.98186052e-01],
       [1.00000000e+00],
       [1.12160703e-03],
       [3.31239062e-06],
       [9.52363095e-08],
       [1.97967365e-01],
       [1.00000000e+00],
       [9.99958634e-01],
       [9.99991298e-01],
       [2.90348586e-02],
       [5.67630351e-01],
       [1.00000000e+00],
       [2.66924709e-01],
       [1.09168195e-05],
       [6.22951925e-01],
       [9.99993742e-01],
       [9.99995828e-01],
       [6.89870678e-04],
       [7.52033824e-10],
       [1.68916131e-06],
       [1.00000000e+00],
       [7.96800195e-11],
       [9.99999940e-01],
       [1.00000000e+00],
       [2.97777355e-01],
       [9.99983966e-01],
       [6.54804647e-01]],

In [7]:
# As this is a binary classification model and outputs are probabilites, a threshold is applied for conversion to binary class (Threshold at 0.5)
# Democrat = 0
# Republican = 1
class_labels = (predictions > 0.5).astype(int)
print("Class Labels:", class_labels)


Class Labels: [[1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [0]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]
 [1]
 [1]
 [0]
 [0]
 [0]
 [0]
 [1]
 [1]
 [1]
 [0]
 [1]
 [1]
 [0]
 [0]
 [1]
 [1]
 [1]
 [0]
 [0]
 [0]
 [1]
 [0]
 [1]
 [1]
 [0]
 [1]
 [1]]


In [8]:
# Interpretation of binary classification results
# Count the number of 1s (Republican)
number_of_ones = np.sum(class_labels == 1)

# Count the number of 0s (Democrat)
number_of_zeros = np.sum(class_labels == 0)

# Total number of counties
total_counties = class_labels.size

print(f"Total # of Counties: {total_counties}")
print(f"Number of Democratic Counties: {number_of_zeros}")
print(f"Number of Republican Counties: {number_of_ones}")


Total # of Counties: 40
Number of Democratic Counties: 18
Number of Republican Counties: 22


In [9]:
# Enriching 2023 county data with model prediction results
county_data_2023['winning_party D:0 R:1'] = class_labels
county_data_2023


Unnamed: 0,Year,County,% Male,% Female,% Male > 18 Years Old,% Female > 18 Years Old,% White,% Black or African American,% American Indian and Alaska Native,% Asian,...,Nonveteran Labor Force Participation Rate (%),Nonveteran Unemployment Rate (%),% Nonveteran Population With Income Below Poverty Level (Past 12 Months),% Nonveteran Population With Any Disability,Birth Rate Per 1000 Women (15-19 Years Old),Birth Rate Per 1000 Women (20-34 Years Old),Birth Rate Per 1000 Women (35-50 Years Old),Received Public Assistance Income Rate Per 1000 Women (Past 12 Months),Segment,winning_party D:0 R:1
0,2023,ADAMS,0.492,0.508,0.49,0.51,0.882,0.013,0.004,0.008,...,0.821,0.035,0.079,0.141,0.0,94.0,49.0,97.555556,2,1
1,2023,ALLEGHENY,0.488,0.512,0.482,0.518,0.759,0.125,0.001,0.042,...,0.811,0.039,0.11,0.144,9.0,79.0,35.0,189.0,0,0
2,2023,ARMSTRONG,0.499,0.501,0.497,0.503,0.951,0.006,0.0,0.001,...,0.769,0.027,0.106,0.186,30.0,123.0,9.0,68.0,1,1
3,2023,BEAVER,0.495,0.505,0.487,0.513,0.859,0.054,0.002,0.003,...,0.801,0.027,0.1,0.174,0.0,67.0,26.0,222.0,2,1
4,2023,BERKS,0.495,0.505,0.49,0.51,0.692,0.041,0.007,0.014,...,0.786,0.052,0.104,0.155,6.0,102.0,45.0,57.0,2,0
5,2023,BLAIR,0.497,0.503,0.492,0.508,0.905,0.024,0.0,0.008,...,0.757,0.032,0.147,0.189,0.0,64.0,49.0,0.0,1,1
6,2023,BUCKS,0.495,0.505,0.49,0.51,0.809,0.032,0.002,0.05,...,0.823,0.036,0.066,0.129,0.0,84.0,30.0,199.0,0,0
7,2023,BUTLER,0.498,0.502,0.494,0.506,0.928,0.009,0.003,0.014,...,0.781,0.043,0.087,0.145,0.0,82.0,34.0,0.0,2,0
8,2023,CAMBRIA,0.498,0.502,0.494,0.506,0.906,0.032,0.0,0.004,...,0.748,0.042,0.12,0.216,7.0,107.0,39.0,27.0,1,1
9,2023,CARBON,0.51,0.49,0.508,0.492,0.872,0.023,0.016,0.007,...,0.773,0.065,0.118,0.2,6.589744,89.384615,32.102564,97.555556,1,1


In [10]:
# Exporting 2023 enriched dataset with county predictions
county_data_2023.to_csv('2023_County_Final_With_Segments_And_Predictions.csv', index=False)
