In [1]:
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import SGD

# Load IMDb dataset
# We only use the top 5000 most frequent words to keep the vocabulary size manageable
max_features = 5000
(X_train, y_train), (X_valid, y_valid) = imdb.load_data(num_words=max_features)

# Pad sequences to ensure uniform length for input
maxlen = 200  # Maximum length of input sequences
X_train = pad_sequences(X_train, maxlen=maxlen)
X_valid = pad_sequences(X_valid, maxlen=maxlen)

# Build the RNN model
model = Sequential([
    Embedding(input_dim=max_features, output_dim=128, input_length=maxlen),
    SimpleRNN(128, activation='relu'),
    Dense(1, activation='sigmoid')  # Sigmoid for binary classification (positive/negative)
])

# Compile the model
model.compile(loss='binary_crossentropy', optimizer=SGD(learning_rate=0.01), metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, batch_size=128, epochs=5, validation_data=(X_valid, y_valid))

# Evaluate the model
score = model.evaluate(X_valid, y_valid, verbose=0)
print(f"Test loss: {score[0]}")
print(f"Test accuracy: {score[1]}")

# Example prediction
sample_review = ["This movie was amazing! The acting was superb and the story was touching."]
# Convert the review to integer tokens (same as how data was preprocessed)
from tensorflow.keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(sample_review)
sequence = tokenizer.texts_to_sequences(sample_review)
padded_sequence = pad_sequences(sequence, maxlen=maxlen)

# Predict sentiment (1: positive, 0: negative)
prediction = model.predict(padded_sequence)
print(f"Prediction: {'Positive' if prediction[0] > 0.5 else 'Negative'}")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step




Epoch 1/5
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 252ms/step - accuracy: 0.4946 - loss: 0.6935 - val_accuracy: 0.4933 - val_loss: 0.6934
Epoch 2/5
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 257ms/step - accuracy: 0.5029 - loss: 0.6932 - val_accuracy: 0.5054 - val_loss: 0.6931
Epoch 3/5
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 252ms/step - accuracy: 0.5179 - loss: 0.6926 - val_accuracy: 0.5124 - val_loss: 0.6928
Epoch 4/5
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 257ms/step - accuracy: 0.5228 - loss: 0.6924 - val_accuracy: 0.5192 - val_loss: 0.6925
Epoch 5/5
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 243ms/step - accuracy: 0.5373 - loss: 0.6918 - val_accuracy: 0.5234 - val_loss: 0.6923
Test loss: 0.6922677159309387
Test accuracy: 0.523360013961792
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 206ms/step
Prediction: Positive
