In [None]:
from tensorflow import keras
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Embedding
from tensorflow.keras.layers import SimpleRNN
from tensorflow.keras.datasets import imdb
from tensorflow.keras import initializers

In [None]:
max_features = 20000 #This is used in loading the data , picks the most common (max_features) words
maxlen = 30 # maximum_length of sequence - truncate after thus limit
batch_size = 32

In [None]:
(x_train, y_train) , (x_test,y_test) = imdb.load_data(num_words=max_features)
print(len(x_train) , 'train sequences')
print(len(x_test) , 'test sequences')

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
25000 train sequences
25000 test sequences


In [None]:
#This pads (or truncates) the sequences so that they are of the maximum length

x_train = sequence.pad_sequences(x_train , maxlen = maxlen)
x_test = sequence.pad_sequences(x_test, maxlen = maxlen)
print("x_train shape: " , x_train.shape)
print("x_test shape: " , x_test.shape)

x_train shape:  (25000, 30)
x_test shape:  (25000, 30)


In [None]:
x_train[23 ,:] #here whats an example sequence looks like an imager sequence or simply array of integers

array([ 6379,  4226,     7,  8749,   844,    10,    10,   279, 17268,
       16807,   494,   178,     8,   870,    11,     4,  1716,    18,
          17,   196,    17,     9,     2,   614,     2,     2,     2,
        3821, 18242,  3821], dtype=int32)

In [None]:
## RNN + EMBEDDED LAYER ARCHITECTURE

rnn_hidden_dim = 5
word_embedding_dim = 50
model_rnn = Sequential()
model_rnn.add(Embedding(max_features , word_embedding_dim))
model_rnn.add(SimpleRNN(rnn_hidden_dim,
                        kernel_initializer = initializers.RandomNormal(stddev = 0.001), #weights assigned to kernel
                        recurrent_initializer = initializers.Identity(gain = 1.0),      #weights assigned to state layers
                        activation = 'relu' ,
                        input_shape = x_train.shape[1:]))

model_rnn.add(Dense(1 , activation = 'sigmoid'))


In [None]:
model_rnn.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, None, 50)          1000000   
                                                                 
 simple_rnn (SimpleRNN)      (None, 5)                 280       
                                                                 
 dense (Dense)               (None, 1)                 6         
                                                                 
Total params: 1,000,286
Trainable params: 1,000,286
Non-trainable params: 0
_________________________________________________________________


In [None]:
rmsprop = keras.optimizers.RMSprop(learning_rate = 0.0001)

model_rnn.compile(loss = "binary_crossentropy" ,
                  optimizer = rmsprop,
                  metrics = ['accuracy'])

In [None]:
model_rnn.fit(x_train , y_train ,
              batch_size = batch_size,
              epochs = 10,
              validation_data = (x_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x79660ebfea40>

In [None]:
score, acc = model_rnn.evaluate(x_test , y_test,
                                batch_size = batch_size)
print("Test Score :" , score)
print("Test Accuracy" , acc)

Test Score : 0.6931464672088623
Test Accuracy 0.5


In [None]:
# tuning the hyperparmaters for better performance

max_features = 20000 #This is used in loading the data , picks the most common (max_features) words
maxlen = 80 # maximum_length of sequence - truncate after thus limit

(x_train, y_train) , (x_test,y_test) = imdb.load_data(num_words=max_features)

x_train = sequence.pad_sequences(x_train , maxlen = maxlen)
x_test = sequence.pad_sequences(x_test, maxlen = maxlen)

In [None]:
rnn_hidden_dim = 5
word_embedding_dim = 50
model_rnn = Sequential()
model_rnn.add(Embedding(max_features , word_embedding_dim))
model_rnn.add(SimpleRNN(rnn_hidden_dim,
                        kernel_initializer = initializers.RandomNormal(stddev = 0.001), #weights assigned to kernel
                        recurrent_initializer = initializers.Identity(gain = 1.0),      #weights assigned to state layers
                        activation = 'relu' ,
                        input_shape = x_train.shape[1:]))

model_rnn.add(Dense(1 , activation = 'sigmoid'))


In [None]:
rmsprop = keras.optimizers.RMSprop(learning_rate = 0.0001)

model_rnn.compile(loss = "binary_crossentropy" ,
                  optimizer = rmsprop,
                  metrics = ['accuracy'])

In [None]:
model_rnn.fit(x_train , y_train ,
              batch_size = batch_size,
              epochs = 10,
              validation_data = (x_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x796606731bd0>

In [None]:
score, acc = model_rnn.evaluate(x_test , y_test,
                                batch_size = batch_size)
print("Test Score :" , score)
print("Test Accuracy" , acc)

Test Score : 0.40818390250205994
Test Accuracy 0.812720000743866


In [None]:
max_features = 5000 #This is used in loading the data , picks the most common (max_features) words
maxlen = 80 # maximum_length of sequence - truncate after thus limit

(x_train, y_train) , (x_test,y_test) = imdb.load_data(num_words=max_features)

x_train = sequence.pad_sequences(x_train , maxlen = maxlen)
x_test = sequence.pad_sequences(x_test, maxlen = maxlen)

In [None]:
rnn_hidden_dim = 5
word_embedding_dim = 20
model_rnn = Sequential()
model_rnn.add(Embedding(max_features , word_embedding_dim))
model_rnn.add(SimpleRNN(rnn_hidden_dim,
                        kernel_initializer = initializers.RandomNormal(stddev = 0.001), #weights assigned to kernel
                        recurrent_initializer = initializers.Identity(gain = 1.0),      #weights assigned to state layers
                        activation = 'relu' ,
                        input_shape = x_train.shape[1:]))

model_rnn.add(Dense(1 , activation = 'sigmoid'))


In [None]:
rmsprop = keras.optimizers.RMSprop(learning_rate = 0.0001)

model_rnn.compile(loss = "binary_crossentropy" ,
                  optimizer = rmsprop,
                  metrics = ['accuracy'])

In [None]:
model_rnn.fit(x_train , y_train ,
              batch_size = batch_size,
              epochs = 10,
              validation_data = (x_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x796606b432e0>

In [None]:
score, acc = model_rnn.evaluate(x_test , y_test,
                                batch_size = batch_size)
print("Test Score :" , score)
print("Test Accuracy" , acc)

Test Score : 0.37334394454956055
Test Accuracy 0.8339599967002869


In [None]:
#just for more iterations in order to see its impact on performance

model_rnn.fit(x_train , y_train ,
              batch_size = batch_size,
              epochs = 10,
              validation_data = (x_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x79660de96320>

In [None]:
score, acc = model_rnn.evaluate(x_test , y_test,
                                batch_size = batch_size)
print("Test Score :" , score)
print("Test Accuracy" , acc)

Test Score : 0.3563743829727173
Test Accuracy 0.8452399969100952
