In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,Dense,SimpleRNN

In [None]:
# Load the IMDB dataset
max_features = 10000  # Number of unique words to consider
# This line loads the IMDB movie review dataset, splitting it into training and test sets.
# Each review is encoded as a sequence of word indices, limited to the top 'max_features' most frequent words.
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)



Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
Training samples: 25000, Test samples: 25000


In [5]:
print(f"Training samples: {len(x_train)}, Test samples: {len(x_test)}")
x_train.shape

Training samples: 25000, Test samples: 25000


(25000,)

In [9]:
print(x_train[0],y_train[0])
sample_review = x_train[0]
sample_label = y_train[0]

[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32] 1


In [None]:
word_index = imdb.get_word_index()
word_index
print(len(word_index))
reversed_word_index = {v: k for k,v in word_index.items()}
# This line decodes the integer-encoded review back to words.
# For each word index in sample_review, it looks up the corresponding word in reversed_word_index,
# subtracting 3 because indices 0, 1, and 2 are reserved (for padding, start of sequence, and unknown).
# If the word index is not found, it uses '?' as a placeholder.
decode_review = ' '.join([reversed_word_index.get(i-3, '?') for i in sample_review])
print(decode_review)


88584


In [33]:



for i in sample_review:
    print(f"{i} : {reversed_word_index.get(i-3, '?')}", end=", ")
    # print(i-3)

1 : ?, 14 : this, 22 : film, 16 : was, 43 : just, 530 : brilliant, 973 : casting, 1622 : location, 1385 : scenery, 65 : story, 458 : direction, 4468 : everyone's, 66 : really, 3941 : suited, 4 : the, 173 : part, 36 : they, 256 : played, 5 : and, 25 : you, 100 : could, 43 : just, 838 : imagine, 112 : being, 50 : there, 670 : robert, 2 : ?, 9 : is, 35 : an, 480 : amazing, 284 : actor, 5 : and, 150 : now, 4 : the, 172 : same, 112 : being, 167 : director, 2 : ?, 336 : father, 385 : came, 39 : from, 4 : the, 172 : same, 4536 : scottish, 1111 : island, 17 : as, 546 : myself, 38 : so, 13 : i, 447 : loved, 4 : the, 192 : fact, 50 : there, 16 : was, 6 : a, 147 : real, 2025 : connection, 19 : with, 14 : this, 22 : film, 4 : the, 1920 : witty, 4613 : remarks, 469 : throughout, 4 : the, 22 : film, 71 : were, 87 : great, 12 : it, 16 : was, 43 : just, 530 : brilliant, 38 : so, 76 : much, 15 : that, 13 : i, 1247 : bought, 4 : the, 22 : film, 17 : as, 515 : soon, 17 : as, 12 : it, 16 : was, 626 : rele

In [43]:
# Padding sequences to ensure uniform input size
maxlen = 400  # Maximum length of each review
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

In [44]:
x_train[0]

array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,   

In [45]:
#training the model
model = Sequential()
model.add(Embedding(max_features, 64, input_length=maxlen))
model.add(SimpleRNN(64,activation='relu'))

model.add(Dense(1, activation='sigmoid'))

In [48]:
model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 400, 64)           640000    
                                                                 
 simple_rnn (SimpleRNN)      (None, 64)                8256      
                                                                 
 dense (Dense)               (None, 1)                 65        
                                                                 
Total params: 648321 (2.47 MB)
Trainable params: 648321 (2.47 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [49]:
from tensorflow.keras.callbacks import EarlyStopping
early_stopping =EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [50]:
model.fit(
    x_train, y_train,
    batch_size=32,
    epochs=10,
    validation_split=0.2,
    callbacks=[early_stopping]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10


<keras.src.callbacks.History at 0x1477bf6d0>

In [51]:
model.save('imdb_rnn_model.h5')

  saving_api.save_model(
