In [37]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Bidirectional, Dense

In [51]:
# Load IMDb dataset, keep the top 10,000 most frequently occurring words
num_words = 10000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=num_words)

# Pad sequences to a fixed length (in this case, 200)
max_review_length = 200
X_train = pad_sequences(X_train, maxlen=max_review_length)
X_test = pad_sequences(X_test, maxlen=max_review_length)

In [52]:
bidirectional_lstm_model = Sequential()
bidirectional_lstm_model.add(Embedding(input_dim=num_words, output_dim=128, input_length=max_review_length))
bidirectional_lstm_model.add(Bidirectional(LSTM(units=64)))
bidirectional_lstm_model.add(Dense(units=1, activation='sigmoid'))

bidirectional_lstm_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [53]:
bidirectional_lstm_model.fit(X_train, y_train, epochs=3, batch_size=64, validation_split=0.2)

# Evaluate the model on the test data
loss, accuracy = bidirectional_lstm_model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.2f}')

Epoch 1/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 171ms/step - accuracy: 0.7037 - loss: 0.5431 - val_accuracy: 0.8576 - val_loss: 0.3385
Epoch 2/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 150ms/step - accuracy: 0.9016 - loss: 0.2579 - val_accuracy: 0.8696 - val_loss: 0.3313
Epoch 3/3
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 155ms/step - accuracy: 0.9379 - loss: 0.1701 - val_accuracy: 0.8560 - val_loss: 0.3371
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 26ms/step - accuracy: 0.8535 - loss: 0.3444
Test Accuracy: 0.85


In [56]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Function to preprocess a new review text and make predictions
def predict_sentiment(review_text):
    # Convert the review text to a list of word indices using the IMDb word index
    word_index = imdb.get_word_index()
    max_review_length = 200  # Assuming reviews are padded to a length of 200 words during training
    
    # Tokenize and pad the input review text
    review_words = review_text.split()
    review_indices = [word_index.get(word, 0) for word in review_words]
    padded_review_indices = pad_sequences([review_indices], maxlen=max_review_length)
    
    # Make prediction for the review
    prediction = bidirectional_lstm_model.predict(padded_review_indices)
    # Convert the prediction to a binary value (0 or 1) based on a threshold (e.g., 0.5)
    threshold = 0.5
    binary_prediction = 1 if prediction > threshold else 0
    
    # Return the binary prediction and the probability score
    return binary_prediction, prediction[0][0]

# Example usage:
new_review = "This is a bad movie"
binary_prediction, probability_score = predict_sentiment(new_review)

if binary_prediction == 1:
    print("Positive review with probability score:", probability_score)
else:
    print("Negative review with probability score:", 1 - probability_score)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
Negative review with probability score: 0.7361432015895844
