In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

In [2]:
### Load the imdb dataset
max_features = 1000 #vocab_size extracting 1000 most occuring features
(X_train, Y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

print(X_train.shape)
print(Y_train.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
(25000,)
(25000,)


In [3]:
## Inspect sample review and its lebel
print(X_train[0])##One Hot Representation
print(Y_train[0])

[1, 14, 22, 16, 43, 530, 973, 2, 2, 65, 458, 2, 66, 2, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 2, 2, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2, 19, 14, 22, 4, 2, 2, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 2, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2, 2, 16, 480, 66, 2, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 2, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 2, 15, 256, 4, 2, 7, 2, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 2, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2, 56, 26, 141, 6, 194, 2, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 2, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 2, 88, 12, 16, 283, 5, 16, 2, 113, 103, 32, 15, 16, 2, 19, 178, 32]
1


In [4]:
sample_review = X_train[0]
sample_label = Y_train[0]
print(f"Review: {sample_review}")
print(f"Label: {sample_label}")

Review: [1, 14, 22, 16, 43, 530, 973, 2, 2, 65, 458, 2, 66, 2, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 2, 2, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2, 19, 14, 22, 4, 2, 2, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 2, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2, 2, 16, 480, 66, 2, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 2, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 2, 15, 256, 4, 2, 7, 2, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 2, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2, 56, 26, 141, 6, 194, 2, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 2, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 2, 88, 12, 16, 283, 5, 16, 2, 113, 103, 32, 15, 16, 2, 19, 178, 32]
Label: 1


In [5]:
## Mapping of word indices back to words
word_index = imdb.get_word_index()
reverse_word_index = {values:key for key, values in word_index.items()}
# print(reverse_word_index)
decoded_review = ' '.join(reverse_word_index.get(i-3,'?') for i in sample_review)
print(decoded_review)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
? this film was just brilliant casting ? ? story direction ? really ? the part they played and you could just imagine being there robert ? is an amazing actor and now the same being director ? father came from the same ? ? as myself so i loved the fact there was a real ? with this film the ? ? throughout the film were great it was just brilliant so much that i ? the film as soon as it was released for ? and would recommend it to everyone to watch and the ? ? was amazing really ? at the end it was so sad and you know what they say if you ? at a film it must have been good and this definitely was also ? to the two little ? that played the ? of ? and paul they were just brilliant children are often left out of the ? ? i think because the stars that play them all ? up are such a big ? for the whole film but

In [6]:
max_len = 500
X_train = pad_sequences(X_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)
print(X_train)

[[  0   0   0 ...  19 178  32]
 [  0   0   0 ...  16 145  95]
 [  0   0   0 ...   7 129 113]
 ...
 [  0   0   0 ...   4   2   2]
 [  0   0   0 ...  12   9  23]
 [  0   0   0 ... 204 131   9]]


## Train Simple RNN

In [7]:
model = Sequential()
model.add(Embedding(input_dim=max_features,output_dim=128, input_length = max_len)) ##Embedding Layer
model.add(SimpleRNN(128, activation='relu'))
model.add(Dense(1, activation="sigmoid"))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])



In [8]:
model.summary()

In [15]:
## Create instance of EarlyStopping Callbacvk
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(monitor='val_loss', patience=15)

In [16]:
## Train the model with early stopping
model.fit(
    X_train, Y_train, epochs = 20,batch_size=25,
    validation_split=0.2,
    callbacks=[early_stopping]
)

Epoch 1/20
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 41ms/step - accuracy: 0.6662 - loss: 0.5835 - val_accuracy: 0.6408 - val_loss: 0.6124
Epoch 2/20
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 40ms/step - accuracy: 0.6734 - loss: 0.5802 - val_accuracy: 0.6438 - val_loss: 0.6104
Epoch 3/20
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 40ms/step - accuracy: 0.6813 - loss: 0.5761 - val_accuracy: 0.6458 - val_loss: 0.6081
Epoch 4/20
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 40ms/step - accuracy: 0.6812 - loss: 0.5658 - val_accuracy: 0.6494 - val_loss: 0.6056
Epoch 5/20
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 41ms/step - accuracy: 0.6853 - loss: 0.5726 - val_accuracy: 0.6542 - val_loss: 0.6031
Epoch 6/20
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 40ms/step - accuracy: 0.6957 - loss: 0.5531 - val_accuracy: 0.6710 - val_loss: 0.5814
Epoch 7/20
[1m8

<keras.src.callbacks.history.History at 0x7a3e37a13590>

In [17]:
model.summary()

In [18]:
### Save the model file
model.save('simple_rnn_imdb.h5')



In [19]:
model.evaluate(x_test, y_test)





[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 11ms/step - accuracy: 0.7953 - loss: 0.4857


[0.4837231934070587, 0.7942799925804138]

In [14]:
model.metrics_names

['loss', 'compile_metrics']