In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.datasets import imdb


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [23]:
# Load and preprocess IMDB dataset
vocab_size = 10000  # Use top 10,000 words
max_len = 200  # Max length of reviews

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

In [3]:
# Build the RNN model with LSTM
model = Sequential([
    Embedding(vocab_size, 128, input_length=max_len),
    LSTM(64, dropout=0.5, recurrent_dropout=0.5),
    Dense(1, activation='sigmoid')  # Binary classification (positive/negative)
])



In [4]:
# Compile and train the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_data=(x_test, y_test))

Epoch 1/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 358ms/step - accuracy: 0.6654 - loss: 0.5996 - val_accuracy: 0.8387 - val_loss: 0.3768
Epoch 2/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 364ms/step - accuracy: 0.8240 - loss: 0.4053 - val_accuracy: 0.8254 - val_loss: 0.4131
Epoch 3/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 355ms/step - accuracy: 0.8494 - loss: 0.3601 - val_accuracy: 0.8302 - val_loss: 0.3889
Epoch 4/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 355ms/step - accuracy: 0.8642 - loss: 0.3248 - val_accuracy: 0.8191 - val_loss: 0.4172
Epoch 5/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 359ms/step - accuracy: 0.8553 - loss: 0.3423 - val_accuracy: 0.8256 - val_loss: 0.4079


<keras.src.callbacks.history.History at 0x7893f0dc8c10>

In [5]:
# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
print(f"Test accuracy: {test_acc}")

782/782 - 27s - 35ms/step - accuracy: 0.8256 - loss: 0.4079
Test accuracy: 0.8256000280380249


In [18]:
def predict_sentiment(review):
    # Simple negation handling
    negations = ["not", "no", "never"]
    tokens = tf.keras.preprocessing.text.text_to_word_sequence(review)

    # Mark negations
    for i, word in enumerate(tokens):
        if word in negations and i + 1 < len(tokens):
            tokens[i + 1] = "not_" + tokens[i + 1]  # Prefix "not_" to the next word

    # Convert words to indices
    word_index = imdb.get_word_index()
    review_indices = [word_index.get(word, 0) for word in tokens]
    review_padded = pad_sequences([review_indices], maxlen=max_len)

    # Make prediction
    prediction = model.predict(review_padded)
    sentiment = 'Positive' if prediction > 0.5 else 'Negative'
    return sentiment

In [19]:
new_review_1 = "This film was a masterpiece! The acting was top-notch."
print(f"Sentiment: {predict_sentiment(new_review_1)}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
Sentiment: Positive


In [21]:
new_review_4 = "It started off well but fell flat in the second half."
print(f"Sentiment: {predict_sentiment(new_review_4)}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
Sentiment: Negative
