Using IMDB [Internet Movie Database Dataset]

Build a 2-class LSTM sentiment model:(Binary Classification (0 = negative, 1,  =  positive), to create a three-class sentiment model (Negative / Neutral / Positive)

In [3]:
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
import pickle

In [38]:
# Step -1: Import Libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
import numpy as np


In [39]:
# Step -2: Load the IMDB Dataset
# num_words=10000 → only keep the top 10,000 most frequent words
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=10000)

print("Training samples:", len(x_train))
print("Test samples:", len(x_test))
print("Unique labels:", np.unique(y_train))  # should show [0 1]

Training samples: 25000
Test samples: 25000
Unique labels: [0 1]


In [40]:
# Step -3: Pad sequences to have equal length
maxlen = 200
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = keras.preprocessing.sequence.pad_sequences(x_test, maxlen=maxlen)

In [41]:
# Step -4: One-hot encode labels for 2 classes
y_train_cat = to_categorical(y_train, num_classes=2)
y_test_cat = to_categorical(y_test, num_classes=2)

In [42]:
# Step -5: Build the Model
model = keras.Sequential()
model.add(layers.Embedding(input_dim=10000, output_dim=128, input_length=maxlen))
model.add(layers.LSTM(128))
model.add(layers.Dense(2, activation='softmax'))   # ✅ 2 classes (positive & negative)

In [43]:
# Step -6: Compile
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [45]:
history = model.fit(x_train[:5000], y_train_cat[:5000],
                    epochs=2,
                    batch_size=64,
                    validation_split=0.2,
                    verbose=1)


Epoch 1/2
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 534ms/step - accuracy: 0.6296 - loss: 0.6452 - val_accuracy: 0.7720 - val_loss: 0.4892
Epoch 2/2
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 533ms/step - accuracy: 0.8810 - loss: 0.3133 - val_accuracy: 0.8330 - val_loss: 0.4166


In [46]:
history = model.fit(x_train[:5000], y_train_cat[:5000],
                    epochs=2,
                    batch_size=64,
                    validation_split=0.2,
                    verbose=1)


Epoch 1/2
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 567ms/step - accuracy: 0.9579 - loss: 0.1279 - val_accuracy: 0.8080 - val_loss: 0.5303
Epoch 2/2
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 518ms/step - accuracy: 0.9756 - loss: 0.0774 - val_accuracy: 0.8120 - val_loss: 0.6896


In [47]:
# Step -8: Evaluate
loss, acc = model.evaluate(x_test, y_test_cat)
print(f"Test Accuracy: {acc:.4f}")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 108ms/step - accuracy: 0.8180 - loss: 0.6485
Test Accuracy: 0.8158


In [48]:
# Step -9: Prediction Function
word_index = keras.datasets.imdb.get_word_index()
index_word = {v + 3: k for k, v in word_index.items()}
index_word[0] = "<PAD>"
index_word[1] = "<START>"
index_word[2] = "<UNK>"
index_word[3] = "<UNUSED>"

def encode_review(text):
    words = text.lower().split()
    encoded = [1]  # start token
    for w in words:
        if w in word_index:
            encoded.append(word_index[w] + 3)
        else:
            encoded.append(2)  # unknown token
    return keras.preprocessing.sequence.pad_sequences([encoded], maxlen=maxlen)

def predict_sentiment(text):
    encoded = encode_review(text)
    prediction = model.predict(encoded)
    sentiment = np.argmax(prediction)
    if sentiment == 1:
        print(f"Review: {text}\nPrediction: Positive ✅")
    else:
        print(f"Review: {text}\nPrediction: Negative ❌")

In [49]:
# Step -10: Test with Custom Reviews
predict_sentiment("I really loved this movie, it was amazing!")
predict_sentiment("This movie was terrible and boring.")
predict_sentiment("The film was okay, not the best but not the worst either.")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 209ms/step
Review: I really loved this movie, it was amazing!
Prediction: Positive ✅
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
Review: This movie was terrible and boring.
Prediction: Negative ❌
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
Review: The film was okay, not the best but not the worst either.
Prediction: Negative ❌
