# Modeling Voter Turnout Behavior with Keras

In [1]:
import pandas as pd
import numpy as np
import pickle
import altair as alt
from sklearn.preprocessing import StandardScaler
from cats import dummify
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from modeling_functions import aggregate_samples, aggregate_errors, plot_agg_error
from texter import send

In [2]:
homesf = open('home_votes', 'rb')
homes = pickle.load(homesf)
homesf.close()
test_indicesf = open('split_indeces', 'rb')
split_indices = pickle.load(test_indicesf)
test_indicesf.close()

In [3]:
homes = pd.get_dummies(homes,columns=dummify)
extra_cats = [x for x in homes.columns if '_0' in x]
homes.drop(columns=extra_cats, inplace=True)

In [4]:
dropcols = ['v19pu','v20pu','vpu', 'pin']
X_train = homes[homes.index.isin(split_indices[2])].drop(columns=dropcols)
X_test = homes[~homes.index.isin(split_indices[2])].drop(columns=dropcols)
y_train_rv = homes[homes.index.isin(X_train.index)]['vpu']
y_train_19 = homes[homes.index.isin(X_train.index)]['v19pu']
y_train_20 = homes[homes.index.isin(X_train.index)]['v20pu']
y_test_rv = homes[~homes.index.isin(split_indices[2])]['vpu']
y_test_19 = homes[~homes.index.isin(split_indices[2])]['v19pu']
y_test_20 = homes[~homes.index.isin(split_indices[2])]['v20pu']

In [5]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
split = (X_train, X_test, y_train_rv, y_test_rv, y_train_19, y_test_19, y_train_20, y_test_20)
with open('nnsplits', 'wb') as file:
    pickle.dump(split, file)

In [None]:
#tests = aggregate_samples(pd.DataFrame(X_test_pca), 100, 1000)

In [13]:
# compile and train RV regression on sample
model_rv = Sequential()
model_rv.add(Dense(512, activation= "relu"))
#model_rv.add(Dropout(.4))
model_rv.add(Dense(256, activation= "relu"))
#model_rv.add(Dropout(.4))
model_rv.add(Dense(128, activation= "relu"))
model_rv.add(Dense(64, activation="relu"))
model_rv.add(Dense(1))
model_rv.compile(
    loss= "mean_squared_error", optimizer="adam"
)

In [12]:
history_rv = model_rv.fit(np.array(X_train).astype('float32'), np.array(y_train_rv), epochs=10, validation_split=.2)

Train on 572175 samples, validate on 143044 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10


KeyboardInterrupt: 

In [8]:
history_rv2 = model_rv.fit(np.array(X_train).astype('float32'), np.array(y_train_rv), epochs=15, validation_split=.2)

Train on 572175 samples, validate on 143044 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15

KeyboardInterrupt: 

In [24]:
rv_pred = model_rv.predict(np.array(X_test).astype('float32'))

In [25]:
rv_pred.sum()

359966.3

In [26]:
y_test_rv.sum()

347631.4478521943

In [11]:
model_rv.save('nnregrv')

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: nnregrv/assets


In [12]:
send('RV Network Done')

## 2019 Voters

In [110]:
model_19 = Sequential()
#model.add(Dense(500, input_dim=4, activation= "relu"))
model_19.add(Dense(100, input_dim=431, activation= "relu"))
model_19.add(Dropout(.6))
model_19.add(Dense(50, activation= "relu"))
model_19.add(Dense(1))
model_19.compile(
    loss= "mean_squared_error", optimizer="adam"
)



In [111]:
history19 = model_19.fit(np.array(X_train).astype('float32'), np.array(y_train_19), epochs=70,  validation_split=.2)

Train on 572175 samples, validate on 143044 samples
Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70

KeyboardInterrupt: 

In [None]:
model_19.save('nnreg19')

In [None]:
send('2019 Network Done')

## 2020 Voters

In [35]:
# compile and train RV regression on sample
model_20 = Sequential()
model_20.add(Dense(500, input_dim=431, activation= "relu"))
model_20.add(Dropout(.3))
model_20.add(Dense(100, activation= "relu"))
model_20.add(Dense(50, activation= "relu"))
model_20.add(Dense(1))
model_20.compile(
    loss= "mean_squared_error", optimizer="adam"
)

In [36]:
history20 = model_20.fit(np.array(X_train).astype('float32'), np.array(y_train_20), epochs=10, validation_split=.2)

Train on 572175 samples, validate on 143044 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
106752/572175 [====>.........................] - ETA: 2:26 - loss: 0.4504

KeyboardInterrupt: 

In [None]:
model_20.save('nnreg20')

In [None]:
send('2020 Network Done')