In [1]:
import numpy as np
from tensorflow import keras
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
# Set vocabulary size
vocab_size = 10000  # Only keep the top 10k words

# Load IMDB dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

# Pad sequences to the same length
maxlen = 500  # We'll cut reviews after 500 words
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

print(f"x_train shape: {x_train.shape}")
print(f"x_test shape: {x_test.shape}")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
x_train shape: (25000, 500)
x_test shape: (25000, 500)


In [3]:
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=128, input_length=maxlen),
    LSTM(128, dropout=0.2, recurrent_dropout=0.2),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.summary()



In [4]:
history = model.fit(x_train, y_train,
                    batch_size=64,
                    epochs=3,
                    validation_split=0.2)

Epoch 1/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m541s[0m 2s/step - accuracy: 0.6764 - loss: 0.5792 - val_accuracy: 0.7892 - val_loss: 0.4724
Epoch 2/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m544s[0m 2s/step - accuracy: 0.8399 - loss: 0.3807 - val_accuracy: 0.8406 - val_loss: 0.3769
Epoch 3/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m544s[0m 2s/step - accuracy: 0.8616 - loss: 0.3264 - val_accuracy: 0.7930 - val_loss: 0.4521


In [5]:
score, acc = model.evaluate(x_test, y_test, batch_size=64)
print(f"Test Loss: {score:.4f}")
print(f"Test Accuracy: {acc:.4f}")

[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 287ms/step - accuracy: 0.7951 - loss: 0.4517
Test Loss: 0.4484
Test Accuracy: 0.7978


In [6]:
# Example: predict the sentiment of the first review in test set
sample_review = x_test[0].reshape(1, -1)
prediction = model.predict(sample_review)[0][0]
print(f"Predicted sentiment score: {prediction:.4f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 873ms/step
Predicted sentiment score: 0.3219
