In [1]:
import tensorflow_datasets as tfds

# Load the training split and convert to numpy for easier handling
train_data = tfds.as_numpy(tfds.load('imdb_reviews', split='train', batch_size=-1))
test_data = tfds.as_numpy(tfds.load('imdb_reviews', split='test', batch_size=-1))

# Extract text and labels
train_sentences = [str(x) for x in train_data['text']]
train_labels = train_data['label']

test_sentences = [str(x) for x in test_data['text']]
test_labels = test_data['label']




Downloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.C9MDAF_1.0.0/imdb_reviews-train.tfrecor…

Generating test examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.C9MDAF_1.0.0/imdb_reviews-test.tfrecord…

Generating unsupervised examples...: 0 examples [00:00, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.C9MDAF_1.0.0/imdb_reviews-unsupervised.…

Dataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.


In [2]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Define the vocabulary size and sequence length
vocab_size = 10000
max_length = 100
oov_token = "<OOV>"

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_token)
tokenizer.fit_on_texts(train_sentences)

train_sequences = tokenizer.texts_to_sequences(train_sentences)
train_padded = pad_sequences(train_sequences, maxlen=max_length, truncating='post')

test_sequences = tokenizer.texts_to_sequences(test_sentences)
test_padded = pad_sequences(test_sequences, maxlen=max_length, truncating='post')


In [3]:
import tensorflow as tf

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, 16, input_length=max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16)),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()




In [4]:
history = model.fit(
    train_padded, train_labels,
    epochs=3,
    validation_data=(test_padded, test_labels)
)


Epoch 1/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 78ms/step - accuracy: 0.6619 - loss: 0.5797 - val_accuracy: 0.7938 - val_loss: 0.4446
Epoch 2/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 75ms/step - accuracy: 0.8679 - loss: 0.3236 - val_accuracy: 0.8178 - val_loss: 0.4041
Epoch 3/3
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 89ms/step - accuracy: 0.9066 - loss: 0.2391 - val_accuracy: 0.7915 - val_loss: 0.4536


In [8]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

y_pred_probs = model.predict(test_padded)
y_pred = (y_pred_probs > 0.5).astype("int32")

accuracy  = accuracy_score(test_labels, y_pred)
precision = precision_score(test_labels, y_pred)
recall    = recall_score(test_labels, y_pred)
f1        = f1_score(test_labels, y_pred)

print(f"Accuracy : {accuracy * 100:.2f}%")
print(f"Precision: {precision * 100:.2f}%")
print(f"Recall   : {recall * 100:.2f}%")
print(f"F1-score : {f1 * 100:.2f}%")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 14ms/step
Accuracy : 79.15%
Precision: 85.70%
Recall   : 69.98%
F1-score : 77.05%


In [7]:
loss, test_acc = model.evaluate(test_padded, test_labels, verbose=0)
print(f"Test Accuracy: {test_acc * 100:.2f}%")


Test Accuracy: 79.15%


In [5]:
import numpy as np

def predict_sentiment(text):
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=max_length, truncating='post')
    prediction = model.predict(padded)[0][0]
    sentiment = "😊 Positive" if prediction > 0.5 else "😞 Negative"
    print(f"Review: {text}")
    print(f"Prediction: {sentiment} (score: {prediction:.3f})")

predict_sentiment("This movie was absolutely fantastic! The plot was brilliant.")
predict_sentiment("I hated this movie. It was boring and too long.")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 364ms/step
Review: This movie was absolutely fantastic! The plot was brilliant.
Prediction: 😊 Positive (score: 0.694)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
Review: I hated this movie. It was boring and too long.
Prediction: 😞 Negative (score: 0.039)


In [None]:
while True:
    user_input = input("Enter a movie review (or 'quit' to exit): ")
    if user_input.lower() == 'quit':
        break
    predict_sentiment(user_input)


Enter a movie review (or 'quit' to exit): the batman, one of the masterpiece i saw in recent past. since covid, it has bring the urge to go back to cinema and watch it 
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
Review: the batman, one of the masterpiece i saw in recent past. since covid, it has bring the urge to go back to cinema and watch it 
Prediction: 😊 Positive (score: 0.943)
Enter a movie review (or 'quit' to exit): quit
