In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import tensorflow as tf
import keras_tuner as kt
import keras as kr


In [2]:
polling_df = pd.read_csv('../election_resources/cleaned_2019_data.csv')
polling_df.head()

Unnamed: 0,question_id,end_date,pollster,sample_size,population,politician,favorable,unfavorable,very_favorable,somewhat_favorable,somewhat_unfavorable,very_unfavorable,results
0,114577,2019-12-10,YouGov,1195,rv,Michael F. Bennet,14.0,29.0,4.0,10.0,12.0,17.0,0
1,114585,2019-12-10,YouGov,1198,rv,Michael Bloomberg,24.0,58.0,7.0,17.0,20.0,38.0,0
2,114589,2019-12-10,YouGov,1198,rv,Cory A. Booker,35.0,45.0,11.0,24.0,12.0,33.0,0
3,114593,2019-12-10,YouGov,1198,rv,Steve Bullock,14.0,27.0,3.0,11.0,11.0,16.0,0
4,114597,2019-12-10,YouGov,1202,rv,Pete Buttigieg,35.0,44.0,14.0,21.0,13.0,31.0,0


In [3]:
polling_df.drop(columns=['question_id', 'end_date'], inplace=True)
polling_df.head()

Unnamed: 0,pollster,sample_size,population,politician,favorable,unfavorable,very_favorable,somewhat_favorable,somewhat_unfavorable,very_unfavorable,results
0,YouGov,1195,rv,Michael F. Bennet,14.0,29.0,4.0,10.0,12.0,17.0,0
1,YouGov,1198,rv,Michael Bloomberg,24.0,58.0,7.0,17.0,20.0,38.0,0
2,YouGov,1198,rv,Cory A. Booker,35.0,45.0,11.0,24.0,12.0,33.0,0
3,YouGov,1198,rv,Steve Bullock,14.0,27.0,3.0,11.0,11.0,16.0,0
4,YouGov,1202,rv,Pete Buttigieg,35.0,44.0,14.0,21.0,13.0,31.0,0


In [4]:
polling_df.nunique()

pollster                 13
sample_size             288
population                1
politician               34
favorable                68
unfavorable              65
very_favorable           49
somewhat_favorable       43
somewhat_unfavorable     33
very_unfavorable         60
results                   2
dtype: int64

In [5]:
polling_df = pd.get_dummies(polling_df, dtype=float)
polling_df.head()

Unnamed: 0,sample_size,favorable,unfavorable,very_favorable,somewhat_favorable,somewhat_unfavorable,very_unfavorable,results,pollster_ABC News/Washington Post,pollster_CNN/SSRS,...,politician_Pete Buttigieg,politician_Seth Moulton,politician_Sherrod Brown,politician_Stacey Yvonne Abrams,politician_Steve Bullock,politician_Terry R. McAuliffe,politician_Tim Ryan,politician_Tom Steyer,politician_Tulsi Gabbard,politician_Wayne Messam
0,1195,14.0,29.0,4.0,10.0,12.0,17.0,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1198,24.0,58.0,7.0,17.0,20.0,38.0,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1198,35.0,45.0,11.0,24.0,12.0,33.0,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1198,14.0,27.0,3.0,11.0,11.0,16.0,0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,1202,35.0,44.0,14.0,21.0,13.0,31.0,0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
polling_df.fillna(0, inplace=True)

In [7]:
y = polling_df['results'].values
X = polling_df.drop(['results'], axis=1).values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=21)

In [25]:
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [26]:
X_scaled.shape

(1223, 55)

In [19]:
def create_model(hp):
    nn_model = tf.keras.models.Sequential()
    activation = hp.Choice('activation', ['relu', 'tanh', 'sigmoid', 'leaky_relu', 'elu', 'selu'])
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
                                        min_value=1,
                                        max_value=100,
                                        step=2), activation=activation, input_dim=len(X_train_scaled[0])))
    for i in range(hp.Int('num_layers', 1, 10)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
                                        min_value=1,
                                        max_value=100,
                                        step=2), activation=activation))
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
    loss = hp.Choice('loss', ['binary_crossentropy', 'mse'])
    optimizer = hp.Choice('optimizer', ['adam', 'rmsprop', 'sgd', 'adagrad', 'adadelta', 'adamax', 'nadam'])
    nn_model.compile(loss=loss, optimizer=optimizer, metrics=["accuracy"])
    
    return nn_model

In [20]:
tuner = kt.Hyperband(create_model, objective='val_accuracy', max_epochs=20, hyperband_iterations=10, directory='my_dir2', project_name='intro_to_kt')

In [21]:
tuner.search(X_train_scaled, y_train, epochs=20, validation_data=(X_test_scaled, y_test))

Trial 300 Complete [00h 00m 10s]
val_accuracy: 0.9975489974021912

Best val_accuracy So Far: 1.0
Total elapsed time: 00h 32m 52s
INFO:tensorflow:Oracle triggered exit


In [22]:
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'tanh',
 'first_units': 75,
 'num_layers': 8,
 'units_0': 15,
 'loss': 'mse',
 'optimizer': 'adam',
 'units_1': 67,
 'units_2': 37,
 'units_3': 61,
 'units_4': 5,
 'units_5': 29,
 'units_6': 83,
 'units_7': 13,
 'tuner/epochs': 3,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 2,
 'tuner/round': 0}

In [23]:
# Evaluate best model against full test data
best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

13/13 - 0s - loss: 4.2026e-04 - accuracy: 1.0000 - 258ms/epoch - 20ms/step
Loss: 0.00042026155279017985, Accuracy: 1.0


In [29]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_scaled[0])
hidden_nodes_layer1 =  75
hidden_nodes_layer2 = 15
hidden_nodes_layer3 = 67
hidden_nodes_layer4 = 37
hidden_nodes_layer5 = 61
hidden_nodes_layer6 = 5
hidden_nodes_layer7 = 29
hidden_nodes_layer8 = 83


output_dim = 1



nn_model = tf.keras.models.Sequential()

# First hidden layer
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="tanh"))

# Second hidden layer
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="tanh"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="tanh"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer4, activation="tanh"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer5, activation="tanh"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer6, activation="tanh"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer7, activation="tanh"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer8, activation="tanh"))





# Output layer
nn_model.add(tf.keras.layers.Dense(output_dim, activation="sigmoid"))

# Check the structure of the model
nn_model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_18 (Dense)            (None, 75)                4200      
                                                                 
 dense_19 (Dense)            (None, 15)                1140      
                                                                 
 dense_20 (Dense)            (None, 67)                1072      
                                                                 
 dense_21 (Dense)            (None, 37)                2516      
                                                                 
 dense_22 (Dense)            (None, 61)                2318      
                                                                 
 dense_23 (Dense)            (None, 5)                 310       
                                                                 
 dense_24 (Dense)            (None, 29)               

In [30]:
nn_model.compile(loss="mse", optimizer="adam", metrics=["accuracy"])

In [12]:

# Train the model
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=100, initial_epoch= 0)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [13]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

13/13 - 0s - loss: 1.5562e-04 - accuracy: 1.0000 - 205ms/epoch - 16ms/step
Loss: 0.00015562385669909418, Accuracy: 1.0


In [14]:
nn_model.predict(X_test_scaled)



array([[4.1197333e-04],
       [4.6341945e-04],
       [1.3908128e-03],
       [4.0480919e-04],
       [3.9984228e-04],
       [4.3444408e-04],
       [4.1480301e-04],
       [4.1885694e-04],
       [4.8474417e-04],
       [4.2000666e-04],
       [4.1685175e-04],
       [4.2314368e-04],
       [9.9906045e-01],
       [4.0235251e-04],
       [4.0250769e-04],
       [5.1099213e-04],
       [4.1465749e-04],
       [3.9807984e-04],
       [9.9908471e-01],
       [9.9906939e-01],
       [4.2811740e-04],
       [4.4406214e-04],
       [4.0650394e-04],
       [3.9588058e-04],
       [5.5757444e-04],
       [4.2209524e-04],
       [4.3801405e-04],
       [9.9894220e-01],
       [3.9921503e-04],
       [4.9630716e-04],
       [4.9315824e-04],
       [4.0235903e-04],
       [4.0891208e-04],
       [4.1978111e-04],
       [4.1585334e-04],
       [4.4045885e-04],
       [4.0182602e-04],
       [9.9905723e-01],
       [4.0477928e-04],
       [4.0505914e-04],
       [4.0441117e-04],
       [4.250940

In [31]:
nn_model.save('trained_model_president.h5')