In [1]:
# 1. Load the IMDB Dataset
import tensorflow as tf
from tensorflow.keras.datasets import imdb

# Load IMDB dataset (Keeps only top 10,000 frequent words)
num_words = 10000
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=num_words)

print(f"Training samples: {len(train_data)}")
print(f"Testing samples: {len(test_data)}")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Training samples: 25000
Testing samples: 25000


In [2]:
#2. Preprocess the Data
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Set max length for reviews (shorter reviews will be padded)
max_length = 200

# Pad sequences so they all have the same length
train_data = pad_sequences(train_data, maxlen=max_length, padding="post")
test_data = pad_sequences(test_data, maxlen=max_length, padding="post")

print(f"First review (numerical format): {train_data[0]}")


First review (numerical format): [   5   25  100   43  838  112   50  670    2    9   35  480  284    5
  150    4  172  112  167    2  336  385   39    4  172 4536 1111   17
  546   38   13  447    4  192   50   16    6  147 2025   19   14   22
    4 1920 4613  469    4   22   71   87   12   16   43  530   38   76
   15   13 1247    4   22   17  515   17   12   16  626   18    2    5
   62  386   12    8  316    8  106    5    4 2223 5244   16  480   66
 3785   33    4  130   12   16   38  619    5   25  124   51   36  135
   48   25 1415   33    6   22   12  215   28   77   52    5   14  407
   16   82    2    8    4  107  117 5952   15  256    4    2    7 3766
    5  723   36   71   43  530  476   26  400  317   46    7    4    2
 1029   13  104   88    4  381   15  297   98   32 2071   56   26  141
    6  194 7486   18    4  226   22   21  134  476   26  480    5  144
   30 5535   18   51   36   28  224   92   25  104    4  226   65   16
   38 1334   88   12   16  283    5   16 447

In [3]:
#3. Build a Simple LSTM Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Build the LSTM model
model = Sequential([
    Embedding(input_dim=num_words, output_dim=64, input_length=max_length),  # Word embeddings
    LSTM(64, return_sequences=False),  # LSTM layer to process sequential text
    Dense(10, activation="relu"),  # Intermediate layer
    Dense(1, activation="sigmoid")  # Final layer (sigmoid for binary classification)
])

# Compile the model
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Display model summary
model.summary()




In [4]:
#4. Train the Model
# Train the model
model.fit(train_data, train_labels, epochs=3, batch_size=64, validation_data=(test_data, test_labels))


Epoch 1/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 206ms/step - accuracy: 0.5551 - loss: 0.6728 - val_accuracy: 0.6217 - val_loss: 0.5953
Epoch 2/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 202ms/step - accuracy: 0.6494 - loss: 0.5635 - val_accuracy: 0.5630 - val_loss: 0.7913
Epoch 3/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 203ms/step - accuracy: 0.6956 - loss: 0.5391 - val_accuracy: 0.5448 - val_loss: 0.6777


<keras.src.callbacks.history.History at 0x7e6c948f4850>

In [5]:
#5. Test the Model on New Reviews
import numpy as np

# Example reviews (manually created)
new_reviews = [
    "I absolutely loved this movie. The story was amazing!",
    "This was the worst film I have ever seen. A complete disaster!"
]

# Convert reviews into numerical sequences
new_sequences = imdb.get_word_index()
encoded_reviews = [[new_sequences.get(word, 2) for word in review.lower().split()] for review in new_reviews]

# Pad the new sequences
new_padded = pad_sequences(encoded_reviews, maxlen=max_length, padding="post")

# Predict sentiment
predictions = model.predict(new_padded)

# Display results
for i, review in enumerate(new_reviews):
    sentiment = "Positive" if predictions[i] > 0.5 else "Negative"
    print(f"\nReview: {review}")
    print(f"Predicted Sentiment: {sentiment}")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 367ms/step

Review: I absolutely loved this movie. The story was amazing!
Predicted Sentiment: Positive

Review: This was the worst film I have ever seen. A complete disaster!
Predicted Sentiment: Positive


In [None]:
#Summary
#✅ The IMDB dataset is used for sentiment analysis in NLP.
#✅ We trained an LSTM model to classify movie reviews as positive or negative.
#✅ The model learned word relationships and can predict sentiment for new reviews.