In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, mean_squared_error, mean_absolute_error
import pandas as pd
import numpy as np
import tensorflow as tf
import keras_tuner as kt
import keras as kr


In [2]:
biden_v_trump = pd.read_csv('../election_resources/historical_elections.csv')
biden_v_trump.head()

Unnamed: 0,samplesize,party,results,favorability
0,1204.0,d,1.0,-22.0006
1,1553.0,d,1.0,-2.0001
2,838.0,d,1.0,-17.6672
3,1198.0,d,1.0,-20.3339
4,1859.0,d,1.0,-6.667


In [3]:
biden_v_trump.head()

Unnamed: 0,samplesize,party,results,favorability
0,1204.0,d,1.0,-22.0006
1,1553.0,d,1.0,-2.0001
2,838.0,d,1.0,-17.6672
3,1198.0,d,1.0,-20.3339
4,1859.0,d,1.0,-6.667


In [4]:
biden_v_trump.columns

Index(['samplesize', 'party', 'results', 'favorability'], dtype='object')

In [5]:
biden_v_trump.nunique()

samplesize       795
party              2
results            2
favorability    6335
dtype: int64

In [6]:
biden_v_trump.shape

(6443, 4)

In [7]:
biden_v_trump = pd.get_dummies(biden_v_trump, dtype=float)
biden_v_trump.head()

Unnamed: 0,results,favorability,samplesize_1000.0,samplesize_1001.0,samplesize_1002.0,samplesize_1003.0,samplesize_1004.0,samplesize_1005.0,samplesize_1006.0,samplesize_1007.0,...,samplesize_990.0,samplesize_992.0,samplesize_993.0,samplesize_994.0,samplesize_997.0,samplesize_<<<<<<< HEAD,samplesize_=======,samplesize_>>>>>>> c6fdee4765df870028d1ec168a99f34f4635955b,party_d,party_r
0,1.0,-22.0006,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,1.0,-2.0001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,1.0,-17.6672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,1.0,-20.3339,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,1.0,-6.667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [8]:
biden_v_trump.fillna(0, inplace=True)

In [9]:
biden_v_trump.shape

(6443, 799)

In [10]:
y = biden_v_trump['results'].values
X = biden_v_trump.drop(['results'], axis=1).values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=21)

In [11]:
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [12]:
X_scaled.shape

(4832, 798)

In [13]:
def create_model(hp):
    nn_model = tf.keras.models.Sequential()
    activation = hp.Choice('activation', ['tanh', 'sigmoid', 'leaky_relu', 'elu', 'selu', 'PReLU'])
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
                                        min_value=1,
                                        max_value=100,
                                        step=2), activation=activation, input_dim=len(X_scaled[0])))
    for i in range(hp.Int('num_layers', 1, 10)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
                                        min_value=1,
                                        max_value=100,
                                        step=2), activation=activation))
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
    loss = hp.Choice('loss', ['binary_crossentropy', 'mse'])
    optimizer = hp.Choice('optimizer', ['adam', 'rmsprop', 'sgd', 'adagrad', 'adadelta', 'adamax', 'nadam'])
    nn_model.compile(loss=loss, optimizer=optimizer, metrics=["accuracy"])
    
    return nn_model

In [14]:
tuner = kt.Hyperband(create_model, objective='val_accuracy', max_epochs=20, hyperband_iterations=10, directory='my_dir2', project_name='intro_to_kt')

In [15]:
tuner.search(X_scaled, y_train, epochs=20, validation_data=(X_test_scaled, y_test))

Trial 300 Complete [00h 00m 23s]
val_accuracy: 0.9373060464859009

Best val_accuracy So Far: 0.9813780188560486
Total elapsed time: 01h 00m 52s
INFO:tensorflow:Oracle triggered exit


In [16]:
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'selu',
 'first_units': 31,
 'num_layers': 8,
 'units_0': 55,
 'loss': 'mse',
 'optimizer': 'rmsprop',
 'units_1': 99,
 'units_2': 63,
 'units_3': 63,
 'units_4': 75,
 'units_5': 79,
 'units_6': 13,
 'units_7': 17,
 'units_8': 55,
 'units_9': 89,
 'tuner/epochs': 20,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 0,
 'tuner/round': 0}

In [17]:
# Evaluate best model against full test data
best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

51/51 - 0s - loss: 0.0185 - accuracy: 0.9814 - 396ms/epoch - 8ms/step
Loss: 0.018457189202308655, Accuracy: 0.9813780188560486


In [18]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_scaled[0])
hidden_nodes_layer1 =  55
hidden_nodes_layer2 = 99
hidden_nodes_layer3 = 63
hidden_nodes_layer4 = 63
hidden_nodes_layer5 = 75
hidden_nodes_layer6 = 79
hidden_nodes_layer7 = 13
hidden_nodes_layer8 = 17
hidden_nodes_layer9 = 55
hidden_nodes_layer10 = 89




output_dim = 1



nn_model = tf.keras.models.Sequential()

# First hidden layer
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="selu"))

# Second hidden layer
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="selu"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="selu"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer4, activation="selu"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer5, activation="selu"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer6, activation="selu"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer7, activation="selu"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer8, activation="selu"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer9, activation="selu"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer10, activation="selu"))







# Output layer
nn_model.add(tf.keras.layers.Dense(output_dim, activation="sigmoid"))

# Check the structure of the model
nn_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_10 (Dense)            (None, 55)                43945     
                                                                 
 dense_11 (Dense)            (None, 99)                5544      
                                                                 
 dense_12 (Dense)            (None, 63)                6300      
                                                                 
 dense_13 (Dense)            (None, 63)                4032      
                                                                 
 dense_14 (Dense)            (None, 75)                4800      
                                                                 
 dense_15 (Dense)            (None, 79)                6004      
                                                                 
 dense_16 (Dense)            (None, 13)               

In [19]:
nn_model.compile(loss='mse', optimizer='rmsprop', metrics=["accuracy"])

In [20]:

# Train the model
fit_model = nn_model.fit(X_scaled, y_train, epochs=100, initial_epoch= 0)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [21]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

51/51 - 0s - loss: 0.0306 - accuracy: 0.9628 - 309ms/epoch - 6ms/step
Loss: 0.03064206801354885, Accuracy: 0.9627560377120972


In [22]:
predictions = nn_model.predict(X_test_scaled)



In [23]:

# Assuming y_true contains the true labels and y_pred contains the predicted labels

#accuracy = accuracy_score(y_test, predictions)
#precision = precision_score(y_test, predictions)
#recall = recall_score(y_test, predictions)
#f1 = f1_score(y_test, predictions)
auc_roc = roc_auc_score(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)


In [24]:
print(f'AUC-ROC score: {auc_roc}')
print(f'MSE score: {mse}')
print(f'MAE score: {mae}')

AUC-ROC score: 0.9879095031551818
MSE score: 0.030642071625123318
MAE score: 0.037821027717657774


In [25]:
nn_model.save('trained_model_president.h5')