In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, mean_squared_error, mean_absolute_error
import pandas as pd
import numpy as np
import tensorflow as tf
import keras_tuner as kt
import keras as kr


In [2]:
biden_v_trump = pd.read_csv('../election_resources/historical_elections.csv')
biden_v_trump.head()

Unnamed: 0,samplesize,party,results,favorability
0,1204.0,d,1,-22.0006
1,1553.0,d,1,-2.0001
2,838.0,d,1,-17.6672
3,1198.0,d,1,-20.3339
4,1859.0,d,1,-6.667


In [3]:
biden_v_trump.head()

Unnamed: 0,samplesize,party,results,favorability
0,1204.0,d,1,-22.0006
1,1553.0,d,1,-2.0001
2,838.0,d,1,-17.6672
3,1198.0,d,1,-20.3339
4,1859.0,d,1,-6.667


In [4]:
biden_v_trump.columns

Index(['samplesize', 'party', 'results', 'favorability'], dtype='object')

In [5]:
biden_v_trump.nunique()

samplesize       792
party              2
results            2
favorability    6335
dtype: int64

In [6]:
biden_v_trump.shape

(6440, 4)

In [7]:
biden_v_trump = pd.get_dummies(biden_v_trump, dtype=float)
biden_v_trump.head()

Unnamed: 0,samplesize,results,favorability,party_d,party_r
0,1204.0,1,-22.0006,1.0,0.0
1,1553.0,1,-2.0001,1.0,0.0
2,838.0,1,-17.6672,1.0,0.0
3,1198.0,1,-20.3339,1.0,0.0
4,1859.0,1,-6.667,1.0,0.0


In [8]:
biden_v_trump.fillna(0, inplace=True)

In [9]:
biden_v_trump.shape

(6440, 5)

In [10]:
y = biden_v_trump['results'].values
X = biden_v_trump.drop(['results'], axis=1).values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=21)

In [11]:
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [12]:
X_scaled.shape

(4830, 4)

In [13]:
def create_model(hp):
    nn_model = tf.keras.models.Sequential()
    activation = hp.Choice('activation', ['tanh', 'sigmoid', 'leaky_relu', 'elu', 'selu', 'PReLU'])
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
                                        min_value=1,
                                        max_value=20,
                                        step=2), activation=activation, input_dim=len(X_scaled[0])))
    for i in range(hp.Int('num_layers', 1, 10)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
                                        min_value=1,
                                        max_value=20,
                                        step=2), activation=activation))
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
    loss = hp.Choice('loss', ['binary_crossentropy', 'mse'])
    optimizer = hp.Choice('optimizer', ['adam', 'rmsprop', 'sgd', 'adagrad', 'adadelta', 'adamax', 'nadam'])
    nn_model.compile(loss=loss, optimizer=optimizer, metrics=["accuracy"])
    
    return nn_model

In [14]:
tuner = kt.Hyperband(create_model, objective='val_accuracy', max_epochs=20, hyperband_iterations=10, directory='my_dir2', project_name='intro_to_kt')

In [15]:
tuner.search(X_scaled, y_train, epochs=20, validation_data=(X_test_scaled, y_test))

Trial 300 Complete [00h 00m 24s]
val_accuracy: 0.9583851099014282

Best val_accuracy So Far: 0.9788820147514343
Total elapsed time: 00h 46m 29s
INFO:tensorflow:Oracle triggered exit


In [24]:
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'PReLU',
 'first_units': 11,
 'num_layers': 5,
 'units_0': 7,
 'loss': 'binary_crossentropy',
 'optimizer': 'nadam',
 'units_1': 9,
 'units_2': 17,
 'units_3': 3,
 'units_4': 9,
 'units_5': 5,
 'units_6': 9,
 'units_7': 9,
 'units_8': 7,
 'units_9': 15,
 'tuner/epochs': 20,
 'tuner/initial_epoch': 7,
 'tuner/bracket': 1,
 'tuner/round': 1,
 'tuner/trial_id': '0083'}

In [25]:
# Evaluate best model against full test data
best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

51/51 - 0s - loss: 0.0961 - accuracy: 0.9789 - 467ms/epoch - 9ms/step
Loss: 0.09612816572189331, Accuracy: 0.9788820147514343


In [13]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_scaled[0])
hidden_nodes_layer1 =  7
hidden_nodes_layer2 = 9
hidden_nodes_layer3 = 17
hidden_nodes_layer4 = 3
hidden_nodes_layer5 = 9



output_dim = 1



nn_model = tf.keras.models.Sequential()

# First hidden layer
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="PReLU"))

# Second hidden layer
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="PReLU"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="PReLU"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer4, activation="PReLU"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer5, activation="PReLU"))







# Output layer
nn_model.add(tf.keras.layers.Dense(output_dim, activation="sigmoid"))

# Check the structure of the model
nn_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 7)                 42        
                                                                 
 dense_1 (Dense)             (None, 9)                 81        
                                                                 
 dense_2 (Dense)             (None, 17)                187       
                                                                 
 dense_3 (Dense)             (None, 3)                 57        
                                                                 
 dense_4 (Dense)             (None, 9)                 45        
                                                                 
 dense_5 (Dense)             (None, 1)                 10        
                                                                 
Total params: 422
Trainable params: 422
Non-trainable pa

In [14]:
nn_model.compile(loss='binary_crossentropy', optimizer='nadam', metrics=["accuracy"])

In [15]:

# Train the model
fit_model = nn_model.fit(X_scaled, y_train, epochs=100, initial_epoch= 0)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [16]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

51/51 - 0s - loss: 0.0942 - accuracy: 0.9789 - 423ms/epoch - 8ms/step
Loss: 0.09418128430843353, Accuracy: 0.9788820147514343


In [17]:
predictions = nn_model.predict(X_test_scaled)



In [23]:

# Assuming y_true contains the true labels and y_pred contains the predicted labels

#accuracy = accuracy_score(y_test, predictions)
#precision = precision_score(y_test, predictions)
#recall = recall_score(y_test, predictions)
#f1 = f1_score(y_test, predictions)
auc_roc = roc_auc_score(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)


In [24]:
print(f'AUC-ROC score: {auc_roc}')
print(f'MSE score: {mse}')
print(f'MAE score: {mae}')

AUC-ROC score: 0.989379777132725
MSE score: 0.020185980635714428
MAE score: 0.041322981589324154


In [None]:
nn_model.save('trained_model_president.h5')