## Recurrent Neural Networks

In [1]:
import keras
from keras.preprocessing import sequence
from keras.utils import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding # Taking sequences of integers, and coming up as word vectors
from keras.layers import SimpleRNN
from keras.datasets import imdb
from keras import initializers

#### This dataset contains data from IMBD on movie reviews. The training and test data both contain 25k rows. Based on the words used in the movie review, we will predict if it is a positive or negative rating. 

In [8]:
max_features = 20000  # This is used in loading the data, picks the most common (max_features) words
maxlen = 30  # maximum length of a sequence - truncate after this. IE only use 30 words
batch_size = 32

In [9]:
## Load in the data.  The function automatically tokenizes the text into distinct integers
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features) #pulling from API
print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')

25000 train sequences
25000 test sequences


In [5]:
from keras.utils import pad_sequences

In [10]:
# This pads (or truncates) the sequences so that they are of the maximum length
X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)
print('x_train shape:', X_train.shape)
print('x_test shape:', X_test.shape)

x_train shape: (25000, 30)
x_test shape: (25000, 30)


#### Lets look at an example

In [11]:
x_train[123,:] 

array([  219,   141,    35,   221,   956,    54,    13,    16,    11,
        2714,    61,   322,   423,    12,    38,    76,    59,  1803,
          72,     8, 10508,    23,     5,   967,    12,    38,    85,
          62,   358,    99], dtype=int32)

#### For the 30 words used here, the first word was used 219 times the second 141, and so on

#### Now lets build a RNN

In [12]:
rnn_hidden_dim = 5      #we will use five hiden layers, arbitrary number
word_embedding_dim = 50 #vector that has 50 numbers, this is used to find similar words. Example hot and boiling are similar
model_rnn = Sequential()
model_rnn.add(Embedding(max_features, word_embedding_dim))  #This layer takes each integer in the sequence and embeds it in a 50-dimensional vector. This is so we find similar words
model_rnn.add(SimpleRNN(rnn_hidden_dim,
                    kernel_initializer=initializers.RandomNormal(stddev=0.001), #weights for the input, we randomize them and let the RNN figure out how to adjust
                    recurrent_initializer=initializers.Identity(gain=1.0),      #weights for each state layer
                    activation='relu', #relu works best, tanh also is good
                    input_shape=x_train.shape[1:])) #specifiy the shape of the input

model_rnn.add(Dense(1, activation='sigmoid')) # output Nodes

In [13]:
model_rnn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 50)          1000000   
                                                                 
 simple_rnn (SimpleRNN)      (None, 5)                 280       
                                                                 
 dense (Dense)               (None, 1)                 6         
                                                                 
Total params: 1,000,286
Trainable params: 1,000,286
Non-trainable params: 0
_________________________________________________________________


In [14]:
rmsprop = keras.optimizers.RMSprop(learning_rate = .0001) #Hyperparamater for learning rate
#more info can be found here on this https://keras.io/api/optimizers/rmsprop/
model_rnn.compile(loss='binary_crossentropy',optimizer=rmsprop,metrics=['accuracy'])

In [15]:
model_rnn.fit(X_train, y_train,batch_size=batch_size,epochs=20,validation_data=(X_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f87d4151f10>

In [19]:
score, acc = model_rnn.evaluate(X_test, y_test,batch_size=batch_size)
print('Test score:', score) #binary cross entropy loss
print('Test accuracy:', acc)

Test score: 0.4580361843109131
Test accuracy: 0.7898399829864502


#### Lets now try a more complex model

In [25]:
max_features = 20000  # This is used in loading the data, picks the most common (max_features) words
maxlen = 80  # maximum length of a sequence - truncate after this

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)

In [27]:
print('X_train shape:', X_train.shape)
print('X_train shape:', X_test.shape)

X_train shape: (25000, 80)
X_train shape: (25000, 80)


#### Build another RNN, using same paramters

In [31]:
rnn_hidden_dim = 5
word_embedding_dim = 20
model_rnn = Sequential()
model_rnn.add(Embedding(max_features, word_embedding_dim))  #This layer takes each integer in the sequence
model_rnn.add(SimpleRNN(rnn_hidden_dim,
                    kernel_initializer=initializers.RandomNormal(stddev=0.001),
                    recurrent_initializer=initializers.Identity(gain=1.0),
                    activation='relu',
                    input_shape=x_train.shape[1:]))

model_rnn.add(Dense(1, activation='sigmoid'))

In [32]:
rmsprop = keras.optimizers.RMSprop(learning_rate = .0001)

model_rnn.compile(loss='binary_crossentropy',optimizer=rmsprop,metrics=['accuracy'])

In [33]:
model_rnn.fit(X_train, y_train,batch_size=batch_size,epochs=10,validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f87cddb2250>

In [34]:
model_rnn.fit(X_train, y_train,batch_size=batch_size,epochs=10,validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f87b32559a0>

#### Conclusion: Given data on movie reviews, I was able to create a RNN model to predict whether it was a positive or negative review with 90% accurary. This is a lot better than a random guess

In [55]:
pred = model_rnn.predict(X_test)



In [63]:
pred[1]

array([0.9958018], dtype=float32)