In [1]:
from tensorflow import keras
from keras.utils import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.layers import SimpleRNN
from keras.datasets import imdb
from keras import initializers

In [2]:
max_features = 20000
maxlen = 30
batch_size = 32

In [3]:
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)

In [4]:
len(X_train)

25000

In [5]:
len(X_test)

25000

In [10]:
X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)

In [12]:
print('x_train shape:', X_train.shape)
print('x_test shape:', X_test.shape)

x_train shape: (25000, 30)
x_test shape: (25000, 30)


## Keras layers for (Vanilla) RNNs

In this exercise, we will not use pre-trained word vectors.  Rather we will learn an embedding as part of the Neural Network.  This is represented by the Embedding Layer below.

### Embedding Layer
`keras.layers.embeddings.Embedding(input_dim, output_dim, embeddings_initializer='uniform', embeddings_regularizer=None, activity_regularizer=None, embeddings_constraint=None, mask_zero=False, input_length=None)`

- This layer maps each integer into a distinct (dense) word vector of length `output_dim`.
- Can think of this as learning a word vector embedding "on the fly" rather than using an existing mapping (like GloVe)
- The `input_dim` should be the size of the vocabulary.
- The `input_length` specifies the length of the sequences that the network expects.

### SimpleRNN Layer
`keras.layers.recurrent.SimpleRNN(units, activation='tanh', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0.0, recurrent_dropout=0.0)`

- This is the basic RNN, where the output is also fed back as the "hidden state" to the next iteration.
- The parameter `units` gives the dimensionality of the output (and therefore the hidden state).  Note that typically there will be another layer after the RNN mapping the (RNN) output to the network output.  So we should think of this value as the desired dimensionality of the hidden state and not necessarily the desired output of the network.
- Recall that there are two sets of weights, one for the "recurrent" phase and the other for the "kernel" phase.  These can be configured separately in terms of their initialization, regularization, etc.






In [13]:
rnn_hidden_dim = 5
word_embedding_dim = 50
model_rnn = Sequential()
model_rnn.add(Embedding(max_features, word_embedding_dim))  #This layer takes each integer in the sequence and embeds it in a 50-dimensional vector
model_rnn.add(SimpleRNN(rnn_hidden_dim,
                    kernel_initializer=initializers.RandomNormal(stddev=0.001),
                    recurrent_initializer=initializers.Identity(gain=1.0),
                    activation='relu',
                    input_shape=X_train.shape[1:]))

model_rnn.add(Dense(1, activation='sigmoid'))

  super().__init__(**kwargs)


In [14]:
model_rnn.summary()

In [15]:
rmsprop = keras.optimizers.RMSprop(learning_rate=.0001)
model_rnn.compile(loss='binary_crossentropy', optimizer=rmsprop, metrics=['accuracy'])

In [16]:
model_rnn.fit(X_train, y_train, batch_size=batch_size, epochs=10, validation_data=(X_test, y_test))

Epoch 1/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 23ms/step - accuracy: 0.5502 - loss: 0.6876 - val_accuracy: 0.6576 - val_loss: 0.6301
Epoch 2/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 23ms/step - accuracy: 0.6843 - loss: 0.5973 - val_accuracy: 0.7121 - val_loss: 0.5541
Epoch 3/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 23ms/step - accuracy: 0.7409 - loss: 0.5226 - val_accuracy: 0.7383 - val_loss: 0.5167
Epoch 4/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 23ms/step - accuracy: 0.7782 - loss: 0.4739 - val_accuracy: 0.7538 - val_loss: 0.4956
Epoch 5/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 24ms/step - accuracy: 0.7942 - loss: 0.4407 - val_accuracy: 0.7672 - val_loss: 0.4750
Epoch 6/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 24ms/step - accuracy: 0.8105 - loss: 0.4200 - val_accuracy: 0.7716 - val_loss: 0.4689
Epoch 7/10
[1m7

<keras.src.callbacks.history.History at 0x1fde331c590>

In [18]:
score, acc = model_rnn.evaluate(X_test, y_test,
                            batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.7837 - loss: 0.4551
Test score: 0.45259496569633484
Test accuracy: 0.7852799892425537


## Exercise

In this exercise, we will illustrate:
- Preparing the data to use sequences of length 80 rather than length 30.  Does it improve the performance?
- Trying different values of the "max_features".  Does this  improve the performance?
- Trying smaller and larger sizes of the RNN hidden dimension.  How does it affect the model performance?  How does it affect the run time?


In [19]:
max_features = 20000
maxlen = 80
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)

In [20]:
run_hidden_dim = 5
word_embedding_dim = 50
model_rnn = Sequential()
model_rnn.add(Embedding(max_features, word_embedding_dim))
model_rnn.add(SimpleRNN(rnn_hidden_dim, kernel_initializer=initializers.RandomNormal(stddev=0.001),
                        recurrent_initializer=initializers.Identity(gain=1.0),
                        activation='relu',
                        input_shape=X_train.shape[1:]))
model_rnn.add(Dense(1, activation='sigmoid'))

  super().__init__(**kwargs)


In [21]:
rmsprop = keras.optimizers.RMSprop(learning_rate=.0001)
model_rnn.compile(loss='binary_crossentropy', optimizer=rmsprop, metrics=['accuracy'])

In [22]:
model_rnn.fit(X_train, y_train, batch_size=batch_size, epochs=10, validation_data=(X_test, y_test))

Epoch 1/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 40ms/step - accuracy: 0.5544 - loss: 0.6850 - val_accuracy: 0.6790 - val_loss: 0.6058
Epoch 2/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 50ms/step - accuracy: 0.7173 - loss: 0.5606 - val_accuracy: 0.7492 - val_loss: 0.5126
Epoch 3/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 46ms/step - accuracy: 0.7797 - loss: 0.4721 - val_accuracy: 0.7745 - val_loss: 0.4738
Epoch 4/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 45ms/step - accuracy: 0.8147 - loss: 0.4098 - val_accuracy: 0.7636 - val_loss: 0.4824
Epoch 5/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 46ms/step - accuracy: 0.8442 - loss: 0.3600 - val_accuracy: 0.8075 - val_loss: 0.4178
Epoch 6/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 40ms/step - accuracy: 0.8559 - loss: 0.3368 - val_accuracy: 0.8178 - val_loss: 0.4026
Epoch 7/10
[1m7

<keras.src.callbacks.history.History at 0x1fde331c980>

In [23]:
max_features = 5000
maxlen = 80
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)


In [24]:
X_train = pad_sequences(X_train, maxlen=maxlen)
X_test = pad_sequences(X_test, maxlen=maxlen)

In [25]:
run_hidden_dim = 5
word_embedding_dim = 20
model_rnn = Sequential()
model_rnn.add(Embedding(max_features, word_embedding_dim))
model_rnn.add(SimpleRNN(rnn_hidden_dim, kernel_initializer=initializers.RandomNormal(stddev=0.001),
                        recurrent_initializer=initializers.Identity(gain=1.0),
                        activation='relu',
                        input_shape=X_train.shape[1:]))
model_rnn.add(Dense(1, activation='sigmoid'))

  super().__init__(**kwargs)


In [26]:
rmsprop = keras.optimizers.RMSprop(learning_rate = .0001)
model_rnn.compile(loss='binary_crossentropy', optimizer=rmsprop, metrics=['accuracy'])

In [27]:
model_rnn.fit(X_train, y_train,
          batch_size=batch_size,
          epochs=10,
          validation_data=(X_test, y_test))

Epoch 1/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 32ms/step - accuracy: 0.5050 - loss: 0.6923 - val_accuracy: 0.5532 - val_loss: 0.6759
Epoch 2/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 25ms/step - accuracy: 0.6451 - loss: 0.6307 - val_accuracy: 0.7283 - val_loss: 0.5337
Epoch 3/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 28ms/step - accuracy: 0.7588 - loss: 0.5027 - val_accuracy: 0.7683 - val_loss: 0.4778
Epoch 4/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 28ms/step - accuracy: 0.7927 - loss: 0.4458 - val_accuracy: 0.7772 - val_loss: 0.4630
Epoch 5/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 31ms/step - accuracy: 0.8115 - loss: 0.4100 - val_accuracy: 0.7894 - val_loss: 0.4419
Epoch 6/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 28ms/step - accuracy: 0.8359 - loss: 0.3790 - val_accuracy: 0.8113 - val_loss: 0.4087
Epoch 7/10
[1m7

<keras.src.callbacks.history.History at 0x1fdd8ac5b50>

In [28]:
model_rnn.fit(X_train, y_train,
          batch_size=batch_size,
          epochs=10,
          validation_data=(X_test, y_test))

Epoch 1/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 27ms/step - accuracy: 0.8645 - loss: 0.3137 - val_accuracy: 0.8328 - val_loss: 0.3731
Epoch 2/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 27ms/step - accuracy: 0.8706 - loss: 0.3075 - val_accuracy: 0.8372 - val_loss: 0.3657
Epoch 3/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 25ms/step - accuracy: 0.8713 - loss: 0.3023 - val_accuracy: 0.8292 - val_loss: 0.3822
Epoch 4/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 25ms/step - accuracy: 0.8779 - loss: 0.2976 - val_accuracy: 0.8374 - val_loss: 0.3654
Epoch 5/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 25ms/step - accuracy: 0.8789 - loss: 0.2905 - val_accuracy: 0.8408 - val_loss: 0.3605
Epoch 6/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 25ms/step - accuracy: 0.8818 - loss: 0.2894 - val_accuracy: 0.8409 - val_loss: 0.3615
Epoch 7/10
[1m7

<keras.src.callbacks.history.History at 0x1fdd437c650>