In [7]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [8]:
#Load the IMDB dataset
vocab_size= 1000      #size of the vocabulary
max_len= 200          #maximum length of each review (truncate longer, pad shorter)
embedding_dim= 128    #Dimension of word embedding

(Xtrain,ytrain), (Xtest,ytest) =imdb.load_data(num_words=vocab_size)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


### Word Enberding

In [23]:
# Get the word index dictionary
word_index = imdb.get_word_index()

# Invert the dictionary to map indices to words
index_to_word = {index + 3: word for word, index in word_index.items()}
index_to_word[0] = "<PAD>"
index_to_word[1] = "<START>"
index_to_word[2] = "<UNK>"
index_to_word[3] = "<UNUSED>"

# Decode a review
def decode_review(encoded_review):
    return ' '.join([index_to_word.get(i, '?') for i in encoded_review])

# Decode the first review
print("First review (decoded):", decode_review(Xtrain[0]))

First review (decoded): <START> this film was just brilliant casting <UNK> <UNK> story direction <UNK> really <UNK> the part they played and you could just imagine being there robert <UNK> is an amazing actor and now the same being director <UNK> father came from the same <UNK> <UNK> as myself so i loved the fact there was a real <UNK> with this film the <UNK> <UNK> throughout the film were great it was just brilliant so much that i <UNK> the film as soon as it was released for <UNK> and would recommend it to everyone to watch and the <UNK> <UNK> was amazing really <UNK> at the end it was so sad and you know what they say if you <UNK> at a film it must have been good and this definitely was also <UNK> to the two little <UNK> that played the <UNK> of <UNK> and paul they were just brilliant children are often left out of the <UNK> <UNK> i think because the stars that play them all <UNK> up are such a big <UNK> for the whole film but these children are amazing and should be <UNK> for what

In [24]:
#Pad sequences to ensure uniform length for RNN input

Xtrain=pad_sequences(Xtrain,maxlen=max_len)
Xtest=pad_sequences(Xtest, maxlen=max_len)

In [32]:
#Define RNN model using Tensorflow

model=tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_len),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(1, activation='sigmoid')
])


#compile the model

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


#print model summary

model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, 200, 128)          128000    
                                                                 
 lstm_3 (LSTM)               (None, 64)                49408     
                                                                 
 dense_3 (Dense)             (None, 1)                 65        
                                                                 
Total params: 177473 (693.25 KB)
Trainable params: 177473 (693.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [33]:
#Train the model

model.fit(Xtrain,ytrain, epochs=3, batch_size=128, validation_data=(Xtest,ytest))


#Evaluate the model

loss,accuracy=model.evaluate(Xtest,ytest)
print(loss,accuracy)

Epoch 1/3
Epoch 2/3
Epoch 3/3
0.3429221510887146 0.8503199815750122
