In [1]:
import re
import pandas as pd
import numpy as np

from tensorflow import keras
from keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

from sklearn import preprocessing
from sklearn.model_selection import train_test_split

In [None]:
# Load dataset
data = pd.read_csv('../preprocessed_congressional_tweet.csv')

data.head(10)

In [None]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Encode labels from string (neg or pos) to integer (0 or 1)
labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data['party_id'])

# One-hot encoding
Y = to_categorical(integer_encoded) 

In [None]:
Y[0]

In [None]:
data_array = data.values

X_train, X_test, Y_train, Y_test = train_test_split(data_array[:,:-1], Y, test_size=0.25, random_state=42)

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

print(X_train.shape)

In [None]:
model = Sequential()
model.add(Dense(512, activation='tanh', input_dim=X_train.shape[1]))
model.add(Dropout(0.5))
model.add(Dense(256, activation='tanh'))
model.add(Dropout(0.25))
model.add(Dense(2, activation='softmax'))
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
# Import callbacks from keras
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

file_path = '/tmp/checkpoint'  # for ModelCheckpoint callback that needs file path to call

# Initialize the callbacks and add them to a list
checkpoint = ModelCheckpoint(file_path, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
reduce_on_plateau = ReduceLROnPlateau(monitor="loss", mode="min", factor=0.1, patience=5, verbose=1)
es = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

callbacks_list = [checkpoint, reduce_on_plateau, es]

In [None]:
history = model.fit(X_train, Y_train, batch_size=512, epochs=20, callbacks=callbacks_list, verbose=1, 
                    validation_data=(X_test, Y_test))

In [None]:
scores = model.evaluate(X_test, Y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

In [None]:
i = 3
predict_array = model.predict(X_test[[i],:])

# Index of the max value (the name of the class with the highest probability)
prediction = np.argmax(predict_array)
truth = np.argmax(Y_test[i])

print(f"Prediction: {prediction}, Truth: {truth}")