In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np



In [2]:
# Load IMDB data (only top 10000 frequent words)
num_words = 10000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

In [3]:
# Pad sequences so that every input is same length
maxlen = 200
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)


In [4]:
# Build LSTM model
model = models.Sequential()
model.add(layers.Embedding(input_dim=num_words, output_dim=128, input_length=maxlen))
model.add(layers.LSTM(64))
model.add(layers.Dense(1, activation='sigmoid'))  # binary sentiment




In [5]:

# Compile model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [6]:
# Train model
history = model.fit(x_train, y_train,
                    epochs=5,
                    batch_size=64,
                    validation_split=0.2)

Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 412ms/step - accuracy: 0.6946 - loss: 0.5665 - val_accuracy: 0.8398 - val_loss: 0.3732
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 382ms/step - accuracy: 0.8954 - loss: 0.2745 - val_accuracy: 0.8180 - val_loss: 0.4022
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 391ms/step - accuracy: 0.9151 - loss: 0.2195 - val_accuracy: 0.8664 - val_loss: 0.3313
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 389ms/step - accuracy: 0.9521 - loss: 0.1310 - val_accuracy: 0.8682 - val_loss: 0.3897
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 358ms/step - accuracy: 0.9638 - loss: 0.1047 - val_accuracy: 0.8538 - val_loss: 0.4183


In [7]:
# Evaluate model
test_loss, test_acc = model.evaluate(x_test, y_test)
print("Test accuracy:", test_acc)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 115ms/step - accuracy: 0.8495 - loss: 0.4411
Test accuracy: 0.8511599898338318


In [8]:
# Function to predict sentiment of new texts
def predict_sentiment(text_sequences):
    # text_sequences should be preprocessed same as training data
    # pad them
    padded = pad_sequences(text_sequences, maxlen=maxlen)
    preds = model.predict(padded)
    return [1 if p >= 0.5 else 0 for p in preds]

In [9]:
# Example usage
sample = x_test[:5]  # just 5 reviews
preds = predict_sentiment(sample)
print("True labels:", y_test[:5])
print("Predicted:", preds)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
True labels: [0 1 1 0 1]
Predicted: [0, 1, 1, 1, 1]
