In [1]:

import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional

In [2]:
max_features = 10000  # Number of unique words to consider
max_len = 200  # Maximum sequence length

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [3]:
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

In [4]:
x_train.shape

(25000, 200)

In [5]:
model = Sequential([
    Embedding(input_dim=max_features, output_dim=128, input_length=max_len),
    Bidirectional(LSTM(64, return_sequences=True)),
    Dropout(0.5),
    LSTM(32),
    Dense(1, activation='sigmoid')
])



In [6]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()


In [7]:
batch_size = 64
epochs = 20

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_split=0.2)

Epoch 1/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 31ms/step - accuracy: 0.7161 - loss: 0.5182 - val_accuracy: 0.8502 - val_loss: 0.3518
Epoch 2/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 26ms/step - accuracy: 0.9126 - loss: 0.2305 - val_accuracy: 0.8672 - val_loss: 0.3733
Epoch 3/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 29ms/step - accuracy: 0.9399 - loss: 0.1607 - val_accuracy: 0.8646 - val_loss: 0.3614
Epoch 4/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 26ms/step - accuracy: 0.9677 - loss: 0.0966 - val_accuracy: 0.8538 - val_loss: 0.4194
Epoch 5/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 26ms/step - accuracy: 0.9773 - loss: 0.0737 - val_accuracy: 0.8472 - val_loss: 0.4915
Epoch 6/20
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 27ms/step - accuracy: 0.9737 - loss: 0.0750 - val_accuracy: 0.8580 - val_loss: 0.5881
Epoch 7/20
[1m313/

In [8]:
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"\nTest accuracy: {test_acc:.2f}")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 10ms/step - accuracy: 0.8309 - loss: 0.9005

Test accuracy: 0.83


In [9]:
sample_texts = ["The movie was fantastic! I loved it.",
                "It was the worst film I've ever seen."]


In [10]:
word_index = imdb.get_word_index()
reverse_word_index = {v + 3: k for k, v in word_index.items()}
reverse_word_index[0] = "<PAD>"
reverse_word_index[1] = "<START>"
reverse_word_index[2] = "<UNK>"

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [11]:
def encode_texts(texts):
    sequences = []
    for text in texts:
        words = text.lower().split()
        sequence = [word_index.get(word, 2) for word in words]  # Use 2 for <UNK>
        sequences.append(sequence)
    return pad_sequences(sequences, maxlen=max_len)

encoded_samples = encode_texts(sample_texts)

In [12]:
predictions = model.predict(encoded_samples)
for i, text in enumerate(sample_texts):
    print(f"Text: {text}")
    print(f"Predicted Sentiment: {'Positive' if predictions[i] > 0.5 else 'Negative'}\n")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 214ms/step
Text: The movie was fantastic! I loved it.
Predicted Sentiment: Negative

Text: It was the worst film I've ever seen.
Predicted Sentiment: Negative

