In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D
from tensorflow.keras.datasets import imdb
import numpy as np

In [2]:
# Load IMDB dataset
num_words = 10000  # Keep top 10,000 words
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [3]:
# Pad sequences to ensure uniform input size
max_length = 100  # Maximum length of input sequences
x_train = pad_sequences(x_train, maxlen=max_length)
x_test = pad_sequences(x_test, maxlen=max_length)

In [4]:
# Build RNN model
model = Sequential([
    Embedding(num_words, 128, input_length=max_length),
    SpatialDropout1D(0.2),
    LSTM(100, dropout=0.2, recurrent_dropout=0.2),
    Dense(1, activation='sigmoid')
])



In [5]:
# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [6]:
# Train the model
epochs = 5
batch_size = 64
model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=epochs, batch_size=batch_size)

Epoch 1/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m154s[0m 381ms/step - accuracy: 0.7038 - loss: 0.5515 - val_accuracy: 0.8145 - val_loss: 0.4138
Epoch 2/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 335ms/step - accuracy: 0.8615 - loss: 0.3343 - val_accuracy: 0.8493 - val_loss: 0.3499
Epoch 3/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m155s[0m 398ms/step - accuracy: 0.8918 - loss: 0.2739 - val_accuracy: 0.8457 - val_loss: 0.3701
Epoch 4/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m151s[0m 386ms/step - accuracy: 0.9151 - loss: 0.2229 - val_accuracy: 0.8396 - val_loss: 0.3746
Epoch 5/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m180s[0m 332ms/step - accuracy: 0.9325 - loss: 0.1831 - val_accuracy: 0.8414 - val_loss: 0.4022


<keras.src.callbacks.history.History at 0x7f79dfbdad90>

In [7]:
# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Accuracy: {test_acc:.4f}')

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 42ms/step - accuracy: 0.8407 - loss: 0.4095
Accuracy: 0.8414


In [8]:
# Predict on custom input
def predict_sentiment(text, tokenizer, model):
    sequence = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(sequence, maxlen=max_length)
    prediction = model.predict(padded)[0][0]
    return 'Positive' if prediction > 0.5 else 'Negative'

In [9]:
# Example usage
word_index = imdb.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [10]:
def decode_review(encoded_review):
    return ' '.join([reverse_word_index.get(i - 3, '?') for i in encoded_review])

example_review = decode_review(x_test[0])
print("Review:", example_review)
print("Predicted Sentiment:", predict_sentiment(example_review, Tokenizer(num_words=num_words), model))

Review: ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? please give this one a miss br br ? ? and the rest of the cast rendered terrible performances the show is flat flat flat br br i don't know how michael madison could have allowed this one on his plate he almost seemed to know this wasn't going to work out and his performance was quite ? so all you madison fans give this a miss
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 537ms/step
Predicted Sentiment: Positive
