In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.datasets import imdb

# Load and preprocess IMDB dataset
vocab_size = 10000  # Use top 10,000 words
max_len = 200  # Max length of reviews

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

# Build the RNN model with LSTM
model = Sequential([
    Embedding(vocab_size, 128, input_length=max_len),
    LSTM(64, dropout=0.5, recurrent_dropout=0.5),
    Dense(1, activation='sigmoid')  # Binary classification (positive/negative)
])

# Compile and train the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_data=(x_test, y_test))

# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
print(f"Test accuracy: {test_acc}")


Epoch 1/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 357ms/step - accuracy: 0.6697 - loss: 0.5951 - val_accuracy: 0.8169 - val_loss: 0.4134
Epoch 2/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 350ms/step - accuracy: 0.8294 - loss: 0.3991 - val_accuracy: 0.8362 - val_loss: 0.3820
Epoch 3/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m154s[0m 382ms/step - accuracy: 0.8462 - loss: 0.3566 - val_accuracy: 0.8206 - val_loss: 0.4118
Epoch 4/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m190s[0m 350ms/step - accuracy: 0.8602 - loss: 0.3349 - val_accuracy: 0.8290 - val_loss: 0.4002
Epoch 5/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 352ms/step - accuracy: 0.8700 - loss: 0.3076 - val_accuracy: 0.8202 - val_loss: 0.4198
782/782 - 27s - 34ms/step - accuracy: 0.8202 - loss: 0.4198
Test accuracy: 0.8201599717140198
