In [1]:
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [2]:
# Load top 10,000 most frequent words
num_words = 10000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=num_words)

# Pad sequences to make them the same length
max_len = 200
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [3]:
model = Sequential()
model.add(Embedding(input_dim=num_words, output_dim=128, input_length=max_len))  # word embeddings
model.add(LSTM(64))  # LSTM layer
model.add(Dense(1, activation='sigmoid'))  # binary output

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])




In [4]:
model.fit(X_train, y_train, epochs=3, batch_size=64, validation_split=0.2)


Epoch 1/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 147ms/step - accuracy: 0.7122 - loss: 0.5271 - val_accuracy: 0.8538 - val_loss: 0.3503
Epoch 2/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 141ms/step - accuracy: 0.8999 - loss: 0.2607 - val_accuracy: 0.8668 - val_loss: 0.3182
Epoch 3/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 148ms/step - accuracy: 0.9447 - loss: 0.1586 - val_accuracy: 0.8464 - val_loss: 0.3396


<keras.src.callbacks.history.History at 0x7997b2f8e110>

In [5]:
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.4f}")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 20ms/step - accuracy: 0.8458 - loss: 0.3564
Test Accuracy: 0.8488
