In [1]:
import numpy as np
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Embedding, Dense
from tensorflow.keras.layers import SimpleRNN as RNN

In [64]:
vocab_size = 3576  # Number of individual tokens plus 1
embedding_vector_length = 80
wordmax = 80

In [73]:
from copy import deepcopy
from sklearn.model_selection import train_test_split
from collections import OrderedDict

# Preprocess Data
def preprocess_data():
    text = np.genfromtxt('sentiment_labelled_sentences/amazon_cells_labelled.txt', dtype='str', delimiter='\t')
    target = np.ndarray.astype(np.array([row[1] for row in text]), np.float)
    features = np.array([row[0] for row in text])
    wordmap = create_wordmap(features)
    encoded_features = encode_text(features, wordmap)
    return train_test_split(encoded_features, target, test_size=0.33, random_state=2910)


def encode_text(corpus, wordmap):
    tmp_corpus = deepcopy(corpus).tolist()
    results = list()
    for idx in range(len(tmp_corpus)):
        tmp_corpus[idx] = space_punctuation(tmp_corpus[idx])
        results.append(tmp_corpus[idx])
    
    # Adds spaces to each review to ensure consistent number of columns across rows
    results = fill_matrix(results,wordmap)
    return results


def decode_text(numerical_output, wordmap):
    tmp_output = deepcopy(numerical_output)
    reverse_wordmap = {v:k for k,v in wordmap.items()}
    for idx, number in enumerate(tmp_output):
        tmp_output[idx] = reverse_wordmap[number]


def create_wordmap(text):
    tmp = space_punctuation(" ".join(text))
    tmp_set = set(tmp)
    
    #Reserve Zero For Spaces 
    wordmap = {idx+1:word for idx,word in enumerate(tmp_set)}
    reverse_wordmap = {word:idx for idx,word in wordmap.items()}
    reverse_wordmap[" "] = 0
    
    return reverse_wordmap


def fill_matrix(datalist, wordmap):
    tmp = np.zeros([len(datalist),len(max(datalist,key = lambda x: len(x)))])
    for i,j in enumerate(datalist):
        for idx in range(len(j)):
            j[idx] = wordmap[j[idx]]
        tmp[i][0:len(j)] = j
    return tmp


def space_punctuation(text):
    return text.replace(",", " , ").replace("!", " ! ").replace("?", " ? ").replace(".", " . ").split()

X_train, X_test, y_train, y_test = preprocess_data()


In [74]:
X_train.shape

(669, 34)

In [83]:
# Vanilla RNN Network
vanilla_rnn = Sequential()
vanilla_rnn.add(Embedding(vocab_size, embedding_vector_length, input_length=wordmax))
vanilla_rnn.add(RNN(100))
vanilla_rnn.add(Dense(1, activation='sigmoid'))
vanilla_rnn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [85]:
history = vanilla_rnn.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=256)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [86]:
scores = vanilla_rnn.evaluate(X_test, y_test, verbose=0)
print('Test accuracy:', scores[1])

Test accuracy: 0.7181817889213562


In [108]:
# LSTM Network
from tensorflow.keras.layers import LSTM

lstm = Sequential()
lstm.add(Embedding(vocab_size, embedding_vector_length, input_length=wordmax))
lstm.add(LSTM(100))
lstm.add(Dense(1, activation='sigmoid'))
lstm.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [109]:
history2 = lstm.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=256)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [110]:
# GRU Network
from tensorflow.keras.layers import GRU

gru = Sequential()
gru.add(Embedding(vocab_size, embedding_vector_length, input_length=wordmax))
gru.add(GRU(128))
gru.add(Dense(1, activation='sigmoid'))
gru.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [111]:
history3 = gru.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=256)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
