In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import tensorflow as tf
import keras_tuner as kt
import keras as kr
import matplotlib.pyplot as plt


In [36]:
biden_v_trump24 = pd.read_csv('../election_resources/biden_v_trump24.csv')
biden_v_trump24.head()

Unnamed: 0,question_id,end_date,pollster,sample_size,population,politician,favorable,unfavorable,very_favorable,somewhat_favorable,somewhat_unfavorable,very_unfavorable,results
0,178646,2023-08-08,YouGov,1500.0,a,Joe Biden,44.0,53.0,20.0,24.0,13.0,40.0,0
1,178647,2023-08-08,YouGov,1178.0,rv,Joe Biden,45.0,53.0,23.0,22.0,11.0,42.0,0
2,178439,2023-08-03,Ipsos,1076.0,a,Joe Biden,33.0,52.0,0.0,0.0,0.0,0.0,0
3,178683,2023-08-03,Cygnal Political,2000.0,lv,Joe Biden,45.9,51.3,25.3,20.7,12.4,38.9,0
4,178365,2023-08-01,YouGov,1500.0,a,Joe Biden,42.0,55.0,18.0,24.0,12.0,43.0,0


In [28]:
biden_v_trump24.columns

Index(['question_id', 'end_date', 'pollster', 'sample_size', 'population',
       'politician', 'favorable', 'unfavorable', 'very_favorable',
       'somewhat_favorable', 'somewhat_unfavorable', 'very_unfavorable',
       'results'],
      dtype='object')

In [37]:
biden_v_trump24.drop(columns=['question_id', 'end_date', 'population', 'very_favorable',
       'somewhat_favorable', 'somewhat_unfavorable', 'very_unfavorable', 'pollster', 'results'], inplace=True)

biden_v_trump24.fillna(0, inplace=True)
biden_v_trump24.head()

Unnamed: 0,sample_size,politician,favorable,unfavorable
0,1500.0,Joe Biden,44.0,53.0
1,1178.0,Joe Biden,45.0,53.0
2,1076.0,Joe Biden,33.0,52.0
3,2000.0,Joe Biden,45.9,51.3
4,1500.0,Joe Biden,42.0,55.0


In [30]:
results = biden_v_trump24['results']
results

0       0
1       0
2       0
3       0
4       0
       ..
1657    0
1658    0
1659    0
1660    0
1661    0
Name: results, Length: 1662, dtype: int64

In [5]:
pollster_to_replace = list(biden_v_trump24['pollster'].value_counts()[biden_v_trump24['pollster'].value_counts() < 33].index)
for i in pollster_to_replace:
    biden_v_trump24['pollster'] = biden_v_trump24['pollster'].replace(i, 'Other')

In [38]:
biden_v_trump24 = pd.get_dummies(biden_v_trump24, dtype=float)
biden_v_trump24.head()

Unnamed: 0,sample_size,favorable,unfavorable,politician_Donald Trump,politician_Joe Biden
0,1500.0,44.0,53.0,0.0,1.0
1,1178.0,45.0,53.0,0.0,1.0
2,1076.0,33.0,52.0,0.0,1.0
3,2000.0,45.9,51.3,0.0,1.0
4,1500.0,42.0,55.0,0.0,1.0


In [39]:
biden_v_trump24.shape

(1662, 5)

In [40]:
nn_model = tf.keras.models.load_model('trained_model_president.h5')
nn_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 13)                78        
                                                                 
 dense_9 (Dense)             (None, 1)                 14        
                                                                 
 dense_10 (Dense)            (None, 1)                 2         
                                                                 
 dense_11 (Dense)            (None, 1)                 2         
                                                                 
 dense_12 (Dense)            (None, 1)                 2         
                                                                 
 dense_13 (Dense)            (None, 1)                 2         
                                                                 
Total params: 100
Trainable params: 100
Non-trainable 

In [41]:
scaler = StandardScaler()
X_scaler = scaler.fit(biden_v_trump24)
X_scaled = X_scaler.transform(biden_v_trump24)


In [46]:
X_scaled

array([[-0.12783023,  0.13100861,  0.10212651, -1.00240964,  1.00240964],
       [-0.33224768,  0.3187091 ,  0.10212651, -1.00240964,  1.00240964],
       [-0.39700104, -1.93369671, -0.08158846, -1.00240964,  1.00240964],
       ...,
       [ 0.31655553, -0.24439235,  0.28584147,  0.99759615, -0.99759615],
       [-0.12783023,  0.3187091 ,  0.28584147,  0.99759615, -0.99759615],
       [ 0.31655553, -0.43209284,  0.28584147,  0.99759615, -0.99759615]])

In [42]:
predictions = nn_model.predict(X_scaled)



In [43]:
predictions

array([[0.51173365],
       [0.512522  ],
       [0.49214295],
       ...,
       [0.43396044],
       [0.43636668],
       [0.4333409 ]], dtype=float32)

In [44]:
predictions_df = pd.DataFrame(predictions, columns=['predictions'])

In [45]:
predictions_df['results']=results
predictions_df.head()

Unnamed: 0,predictions,results
0,0.511734,0
1,0.512522,0
2,0.492143,0
3,0.513787,0
4,0.506966,0


In [16]:
predictions_df.to_csv('predictions.csv', index=False)

In [17]:
predictions_df.head()

Unnamed: 0,predictions,results
0,0.997332,0
1,0.99794,0
2,0.998225,0
3,0.989938,0
4,0.995206,0
