In [2]:
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout
from keras.preprocessing.sequence import pad_sequences

In [3]:
# 1. Load the IMDB dataset
max_features = 10000  # Use the top 10,000 most frequent words
max_len = 200  # Cut reviews after 200 words
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 2us/step


In [4]:
# 2. Pad sequences to ensure uniform length
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

In [5]:
# 3. Build the LSTM model
model = Sequential([
    Embedding(input_dim=max_features, output_dim=128, input_length=max_len),
    LSTM(units=64, return_sequences=False),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary classification: Positive or Negative
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()



In [6]:
# 4. Train the model
history = model.fit(
    x_train, y_train,
    epochs=5,
    batch_size=32,
    validation_split=0.2
)

Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 134ms/step - accuracy: 0.6872 - loss: 0.5742 - val_accuracy: 0.8506 - val_loss: 0.3687
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 90ms/step - accuracy: 0.8953 - loss: 0.2767 - val_accuracy: 0.8660 - val_loss: 0.3291
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 97ms/step - accuracy: 0.9330 - loss: 0.1874 - val_accuracy: 0.8476 - val_loss: 0.3481
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 71ms/step - accuracy: 0.9524 - loss: 0.1375 - val_accuracy: 0.8582 - val_loss: 0.3791
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 61ms/step - accuracy: 0.9631 - loss: 0.1049 - val_accuracy: 0.8596 - val_loss: 0.4075


In [7]:
# 5. Evaluate the model
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 14ms/step - accuracy: 0.8530 - loss: 0.4449
Test Loss: 0.4297, Test Accuracy: 0.8547


In [None]:
# 7. Test with a custom review (Optional)
# Decode IMDB review back to text (for understanding custom reviews)
word_index = imdb.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}

def decode_review(encoded_review):
    return ' '.join([reverse_word_index.get(i - 3, '?') for i in encoded_review])
