In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import tensorflow as tf
import keras_tuner as kt
import keras as kr


In [2]:
biden_v_trump = pd.read_csv('../election_resources/biden_v_trump.csv')
biden_v_trump.head()

Unnamed: 0,question_id,end_date,pollster,sample_size,population,politician,favorable,unfavorable,very_favorable,somewhat_favorable,somewhat_unfavorable,very_unfavorable,results
0,114581,2019-12-10,YouGov,1204,rv,Joseph R. Biden Jr.,42.0,52.0,20.0,22.0,14.0,38.0,1
1,114524,2019-12-09,Quinnipiac University,1553,rv,Joseph R. Biden Jr.,44.0,47.0,0.0,0.0,0.0,0.0,1
2,114382,2019-12-08,Monmouth University,838,rv,Joseph R. Biden Jr.,43.0,50.0,18.0,25.0,16.0,34.0,1
3,114188,2019-12-03,YouGov,1198,rv,Joseph R. Biden Jr.,42.0,51.0,20.0,22.0,14.0,37.0,1
4,114336,2019-11-29,Harris Poll,1859,rv,Joseph R. Biden Jr.,44.0,44.0,15.0,29.0,19.0,25.0,1


In [3]:
biden_v_trump.drop(columns=['question_id', 'end_date'], inplace=True)
biden_v_trump.head()

Unnamed: 0,pollster,sample_size,population,politician,favorable,unfavorable,very_favorable,somewhat_favorable,somewhat_unfavorable,very_unfavorable,results
0,YouGov,1204,rv,Joseph R. Biden Jr.,42.0,52.0,20.0,22.0,14.0,38.0,1
1,Quinnipiac University,1553,rv,Joseph R. Biden Jr.,44.0,47.0,0.0,0.0,0.0,0.0,1
2,Monmouth University,838,rv,Joseph R. Biden Jr.,43.0,50.0,18.0,25.0,16.0,34.0,1
3,YouGov,1198,rv,Joseph R. Biden Jr.,42.0,51.0,20.0,22.0,14.0,37.0,1
4,Harris Poll,1859,rv,Joseph R. Biden Jr.,44.0,44.0,15.0,29.0,19.0,25.0,1


In [4]:
biden_v_trump.nunique()

pollster                 13
sample_size             112
population                1
politician                2
favorable                21
unfavorable              30
very_favorable           25
somewhat_favorable       26
somewhat_unfavorable     21
very_unfavorable         35
results                   2
dtype: int64

In [5]:
biden_v_trump = pd.get_dummies(biden_v_trump, dtype=float)
biden_v_trump.head()

Unnamed: 0,sample_size,favorable,unfavorable,very_favorable,somewhat_favorable,somewhat_unfavorable,very_unfavorable,results,pollster_ABC News/Washington Post,pollster_CNN/SSRS,...,pollster_Monmouth University,pollster_Morning Consult,pollster_NBC News/Wall Street Journal,pollster_Public Policy Polling,pollster_Quinnipiac University,pollster_Suffolk University,pollster_YouGov,population_rv,politician_Donald Trump,politician_Joseph R. Biden Jr.
0,1204,42.0,52.0,20.0,22.0,14.0,38.0,1,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0
1,1553,44.0,47.0,0.0,0.0,0.0,0.0,1,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0
2,838,43.0,50.0,18.0,25.0,16.0,34.0,1,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
3,1198,42.0,51.0,20.0,22.0,14.0,37.0,1,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0
4,1859,44.0,44.0,15.0,29.0,19.0,25.0,1,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0


In [6]:
biden_v_trump.fillna(0, inplace=True)

In [26]:
biden_v_trump.shape

(230, 24)

In [7]:
y = biden_v_trump['results'].values
X = biden_v_trump.drop(['results'], axis=1).values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=21)

In [8]:
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [9]:
X_scaled.shape

(172, 23)

In [14]:
def create_model(hp):
    nn_model = tf.keras.models.Sequential()
    activation = hp.Choice('activation', ['relu', 'tanh', 'sigmoid', 'leaky_relu', 'elu', 'selu'])
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
                                        min_value=1,
                                        max_value=100,
                                        step=2), activation=activation, input_dim=len(X_scaled[0])))
    for i in range(hp.Int('num_layers', 1, 10)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
                                        min_value=1,
                                        max_value=100,
                                        step=2), activation=activation))
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
    loss = hp.Choice('loss', ['binary_crossentropy', 'mse'])
    optimizer = hp.Choice('optimizer', ['adam', 'rmsprop', 'sgd', 'adagrad', 'adadelta', 'adamax', 'nadam'])
    nn_model.compile(loss=loss, optimizer=optimizer, metrics=["accuracy"])
    
    return nn_model

In [16]:
tuner = kt.Hyperband(create_model, objective='val_accuracy', max_epochs=20, hyperband_iterations=10, directory='my_dir2', project_name='intro_to_kt')

In [17]:
tuner.search(X_scaled, y_train, epochs=20, validation_data=(X_test_scaled, y_test))

Trial 300 Complete [00h 00m 03s]
val_accuracy: 0.9655172228813171

Best val_accuracy So Far: 1.0
Total elapsed time: 00h 10m 43s
INFO:tensorflow:Oracle triggered exit


In [18]:
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'elu',
 'first_units': 69,
 'num_layers': 9,
 'units_0': 93,
 'loss': 'mse',
 'optimizer': 'adam',
 'units_1': 77,
 'units_2': 79,
 'units_3': 53,
 'units_4': 97,
 'units_5': 15,
 'units_6': 33,
 'units_7': 31,
 'units_8': 39,
 'units_9': 65,
 'tuner/epochs': 3,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 2,
 'tuner/round': 0}

In [19]:
# Evaluate best model against full test data
best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

2/2 - 0s - loss: 0.0122 - accuracy: 1.0000 - 226ms/epoch - 113ms/step
Loss: 0.012227821163833141, Accuracy: 1.0


In [20]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_scaled[0])
hidden_nodes_layer1 =  93
hidden_nodes_layer2 = 77
hidden_nodes_layer3 = 79
hidden_nodes_layer4 = 53
hidden_nodes_layer5 = 97
hidden_nodes_layer6 = 15
hidden_nodes_layer7 = 33
hidden_nodes_layer8 = 31
hidden_nodes_layer9 = 39
hidden_nodes_layer10 = 65

output_dim = 1



nn_model = tf.keras.models.Sequential()

# First hidden layer
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="elu"))

# Second hidden layer
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="elu"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="elu"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer4, activation="elu"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer5, activation="elu"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer6, activation="elu"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer7, activation="elu"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer8, activation="elu"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer9, activation="elu"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer10, activation="elu"))





# Output layer
nn_model.add(tf.keras.layers.Dense(output_dim, activation="sigmoid"))

# Check the structure of the model
nn_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_11 (Dense)            (None, 93)                2232      
                                                                 
 dense_12 (Dense)            (None, 77)                7238      
                                                                 
 dense_13 (Dense)            (None, 79)                6162      
                                                                 
 dense_14 (Dense)            (None, 53)                4240      
                                                                 
 dense_15 (Dense)            (None, 97)                5238      
                                                                 
 dense_16 (Dense)            (None, 15)                1470      
                                                                 
 dense_17 (Dense)            (None, 33)               

In [21]:
nn_model.compile(loss="mse", optimizer="adam", metrics=["accuracy"])

In [22]:

# Train the model
fit_model = nn_model.fit(X_scaled, y_train, epochs=100, initial_epoch= 0)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [23]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

2/2 - 0s - loss: 0.0070 - accuracy: 0.9828 - 146ms/epoch - 73ms/step
Loss: 0.007026957347989082, Accuracy: 0.982758641242981


In [24]:
nn_model.predict(X_test_scaled)



array([[2.9344990e-05],
       [1.9166899e-05],
       [3.8269565e-05],
       [9.9856710e-01],
       [3.3085689e-05],
       [3.3716544e-05],
       [1.3292172e-04],
       [3.3906555e-05],
       [2.1948757e-05],
       [1.7136033e-05],
       [2.1948757e-05],
       [9.9864143e-01],
       [7.9022742e-05],
       [2.4703839e-05],
       [9.9828714e-01],
       [2.3665260e-05],
       [1.7421742e-05],
       [9.9871957e-01],
       [9.9822336e-01],
       [2.6380450e-05],
       [2.7971035e-05],
       [9.8922324e-01],
       [6.3808489e-01],
       [9.9871314e-01],
       [9.9851245e-01],
       [1.4245780e-05],
       [9.9861109e-01],
       [3.0353687e-05],
       [9.9836880e-01],
       [3.0659739e-05],
       [2.5673233e-05],
       [2.9820341e-05],
       [1.9550778e-05],
       [3.6018100e-05],
       [2.8169796e-05],
       [2.8107244e-05],
       [2.2549702e-05],
       [9.9323881e-01],
       [9.8849213e-01],
       [6.5934335e-05],
       [2.7264308e-05],
       [9.978856

In [25]:
nn_model.save('trained_model_president.h5')