In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import pandas as pd
import numpy as np
import tensorflow as tf
import keras_tuner as kt
import keras as kr


In [7]:
biden_v_trump = pd.read_csv('../election_resources/biden_v_trump.csv')
biden_v_trump.head()

Unnamed: 0,question_id,end_date,pollster,sample_size,population,politician,favorable,unfavorable,very_favorable,somewhat_favorable,somewhat_unfavorable,very_unfavorable,results
0,114581,2019-12-10,YouGov,1204,rv,Joseph R. Biden Jr.,42.0,52.0,20.0,22.0,14.0,38.0,1
1,114524,2019-12-09,Quinnipiac University,1553,rv,Joseph R. Biden Jr.,44.0,47.0,0.0,0.0,0.0,0.0,1
2,114382,2019-12-08,Monmouth University,838,rv,Joseph R. Biden Jr.,43.0,50.0,18.0,25.0,16.0,34.0,1
3,114188,2019-12-03,YouGov,1198,rv,Joseph R. Biden Jr.,42.0,51.0,20.0,22.0,14.0,37.0,1
4,114336,2019-11-29,Harris Poll,1859,rv,Joseph R. Biden Jr.,44.0,44.0,15.0,29.0,19.0,25.0,1


In [9]:
biden_v_trump.head()

Unnamed: 0,question_id,end_date,pollster,sample_size,population,politician,favorable,unfavorable,very_favorable,somewhat_favorable,somewhat_unfavorable,very_unfavorable,results,favorability
0,114581,2019-12-10,YouGov,1204,rv,Joseph R. Biden Jr.,42.0,52.0,20.0,22.0,14.0,38.0,1,-22.0006
1,114524,2019-12-09,Quinnipiac University,1553,rv,Joseph R. Biden Jr.,44.0,47.0,0.0,0.0,0.0,0.0,1,-2.0001
2,114382,2019-12-08,Monmouth University,838,rv,Joseph R. Biden Jr.,43.0,50.0,18.0,25.0,16.0,34.0,1,-17.6672
3,114188,2019-12-03,YouGov,1198,rv,Joseph R. Biden Jr.,42.0,51.0,20.0,22.0,14.0,37.0,1,-20.3339
4,114336,2019-11-29,Harris Poll,1859,rv,Joseph R. Biden Jr.,44.0,44.0,15.0,29.0,19.0,25.0,1,-6.667


In [10]:
biden_v_trump.columns

Index(['question_id', 'end_date', 'pollster', 'sample_size', 'population',
       'politician', 'favorable', 'unfavorable', 'very_favorable',
       'somewhat_favorable', 'somewhat_unfavorable', 'very_unfavorable',
       'results', 'favorability'],
      dtype='object')

In [4]:
biden_v_trump.nunique()

pollster                 13
sample_size             112
population                1
politician                2
favorable                21
unfavorable              30
very_favorable           25
somewhat_favorable       26
somewhat_unfavorable     21
very_unfavorable         35
results                   2
dtype: int64

In [7]:
biden_v_trump = pd.get_dummies(biden_v_trump, dtype=float)
biden_v_trump.head()

Unnamed: 0,sample_size,favorable,unfavorable,very_favorable,somewhat_favorable,somewhat_unfavorable,very_unfavorable,results,biden_score,trump_score,...,pollster_Monmouth University,pollster_Morning Consult,pollster_NBC News/Wall Street Journal,pollster_Public Policy Polling,pollster_Quinnipiac University,pollster_Suffolk University,pollster_YouGov,population_rv,politician_Donald Trump,politician_Joseph R. Biden Jr.
0,1204,28.0014,-34.6684,20.0,7.3326,-4.6662,-38.0,1,-22.0006,-22.0006,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0
1,1553,29.3348,-31.3349,0.0,0.0,-0.0,-0.0,1,-2.0001,-2.0001,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0
2,838,28.6681,-33.335,18.0,8.3325,-5.3328,-34.0,1,-17.6672,-17.6672,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
3,1198,28.0014,-34.0017,20.0,7.3326,-4.6662,-37.0,1,-20.3339,-20.3339,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0
4,1859,29.3348,-29.3348,15.0,9.6657,-6.3327,-25.0,1,-6.667,-6.667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0


In [8]:
biden_v_trump.fillna(0, inplace=True)

In [9]:
biden_v_trump.shape

(230, 26)

In [10]:
y = biden_v_trump['results'].values
X = biden_v_trump.drop(['results'], axis=1).values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=21)

In [11]:
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [45]:
X_scaled.shape

(172, 25)

In [12]:
def create_model(hp):
    nn_model = tf.keras.models.Sequential()
    activation = hp.Choice('activation', ['relu', 'tanh', 'sigmoid', 'leaky_relu', 'elu', 'selu'])
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
                                        min_value=1,
                                        max_value=75,
                                        step=2), activation=activation, input_dim=len(X_scaled[0])))
    for i in range(hp.Int('num_layers', 1, 10)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
                                        min_value=1,
                                        max_value=75,
                                        step=2), activation=activation))
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
    loss = hp.Choice('loss', ['binary_crossentropy', 'mse'])
    optimizer = hp.Choice('optimizer', ['adam', 'rmsprop', 'sgd', 'adagrad', 'adadelta', 'adamax', 'nadam'])
    nn_model.compile(loss=loss, optimizer=optimizer, metrics=["accuracy"])
    
    return nn_model

In [13]:
tuner = kt.Hyperband(create_model, objective='val_accuracy', max_epochs=20, hyperband_iterations=10, directory='my_dir2', project_name='intro_to_kt')

In [14]:
tuner.search(X_scaled, y_train, epochs=20, validation_data=(X_test_scaled, y_test))

Trial 300 Complete [00h 00m 02s]
val_accuracy: 0.931034505367279

Best val_accuracy So Far: 1.0
Total elapsed time: 00h 07m 50s
INFO:tensorflow:Oracle triggered exit


In [15]:
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'tanh',
 'first_units': 71,
 'num_layers': 9,
 'units_0': 73,
 'loss': 'binary_crossentropy',
 'optimizer': 'nadam',
 'units_1': 23,
 'units_2': 19,
 'units_3': 65,
 'units_4': 19,
 'units_5': 55,
 'units_6': 69,
 'units_7': 3,
 'units_8': 9,
 'tuner/epochs': 3,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 2,
 'tuner/round': 0}

In [16]:
# Evaluate best model against full test data
best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

2/2 - 0s - loss: 0.2652 - accuracy: 1.0000 - 189ms/epoch - 95ms/step
Loss: 0.26523008942604065, Accuracy: 1.0


In [26]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_scaled[0])
hidden_nodes_layer1 =  71
hidden_nodes_layer2 = 73
hidden_nodes_layer3 = 23
hidden_nodes_layer4 = 19
hidden_nodes_layer5 = 65
hidden_nodes_layer6 = 55
hidden_nodes_layer7 = 69
hidden_nodes_layer8 = 3
hidden_nodes_layer9 = 9


output_dim = 1



nn_model = tf.keras.models.Sequential()

# First hidden layer
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="tanh"))

# Second hidden layer
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="tanh"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="tanh"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer4, activation="tanh"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer5, activation="tanh"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer6, activation="tanh"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer7, activation="tanh"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer8, activation="tanh"))
nn_model.add(tf.keras.layers.Dense(units=hidden_nodes_layer9, activation="tanh"))






# Output layer
nn_model.add(tf.keras.layers.Dense(output_dim, activation="sigmoid"))

# Check the structure of the model
nn_model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_21 (Dense)            (None, 71)                1846      
                                                                 
 dense_22 (Dense)            (None, 73)                5256      
                                                                 
 dense_23 (Dense)            (None, 23)                1702      
                                                                 
 dense_24 (Dense)            (None, 19)                456       
                                                                 
 dense_25 (Dense)            (None, 65)                1300      
                                                                 
 dense_26 (Dense)            (None, 55)                3630      
                                                                 
 dense_27 (Dense)            (None, 69)               

In [27]:
nn_model.compile(loss='binary_crossentropy', optimizer='nadam', metrics=["accuracy"])

In [28]:

# Train the model
fit_model = nn_model.fit(X_scaled, y_train, epochs=20, initial_epoch= 0)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [29]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

2/2 - 0s - loss: 0.1054 - accuracy: 1.0000 - 196ms/epoch - 98ms/step
Loss: 0.10541243106126785, Accuracy: 1.0


In [30]:
predictions = nn_model.predict(X_test_scaled)



In [None]:
y_te

In [33]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
)

# Calculating the accuracy score
acc_score = accuracy_score(y_test, predictions)

ValueError: Classification metrics can't handle a mix of binary and continuous targets

In [29]:
nn_model.save('trained_model_president.h5')