In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout, SpatialDropout1D
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [3]:
max_words = 10000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_words)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1us/step


In [4]:
max_len = 150
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)

In [7]:
def sequence_to_text(sequence):
    word_index = imdb.get_word_index()
    reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
    decoded_review = ' '.join([reverse_word_index.get(i - 3, '?') for i in sequence])
    return decoded_review

In [9]:
for i in range(3):
    print(f"Example {i+1}:\n{sequence_to_text(X_train[i])}\n")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3us/step
Example 1:
it was just brilliant so much that i bought the film as soon as it was released for ? and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also ? to the two little boy's that played the ? of norman and paul they were just brilliant children are often left out of the ? list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you think the whole story was so lovely because it was true and was someone's life after all that was shared with us all

Example 2:
the worst ever made the plot is paper thin and ridiculous the

In [10]:
model = Sequential()
model.add(Embedding(max_words, 128, input_length=max_len))
model.add(SpatialDropout1D(0.3))
model.add(Bidirectional(LSTM(100, dropout=0.3, recurrent_dropout=0.3)))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])



In [11]:
early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

batch_size = 32
epochs = 10
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size,
          validation_data=(X_test, y_test), callbacks=[early_stopping])

Epoch 1/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m211s[0m 255ms/step - accuracy: 0.6684 - loss: 0.5835 - val_accuracy: 0.8226 - val_loss: 0.4043
Epoch 2/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m198s[0m 249ms/step - accuracy: 0.8337 - loss: 0.3926 - val_accuracy: 0.8280 - val_loss: 0.3984
Epoch 3/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 252ms/step - accuracy: 0.8780 - loss: 0.3058 - val_accuracy: 0.8599 - val_loss: 0.3390
Epoch 4/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m193s[0m 246ms/step - accuracy: 0.9052 - loss: 0.2472 - val_accuracy: 0.8589 - val_loss: 0.3423
Epoch 5/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m194s[0m 249ms/step - accuracy: 0.9228 - loss: 0.2031 - val_accuracy: 0.8568 - val_loss: 0.3611


<keras.src.callbacks.history.History at 0x1893fd5f980>

In [12]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Accuracy: {accuracy*100:.2f}%')

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 36ms/step - accuracy: 0.8597 - loss: 0.3421
Accuracy: 85.99%


In [27]:
new_texts = ["This movie is great!", "The plot is confusing.", "Amazing film with brilliant performances."," It was a horrible movie.","it could have been great if it was shorter.","I would have watched it if it was shorter"]

# Tokenize and pad the new texts
tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
tokenizer.fit_on_texts(new_texts)
new_sequences = tokenizer.texts_to_sequences(new_texts)
new_padded = pad_sequences(new_sequences, maxlen=max_len)

# Make predictions
predictions = model.predict(new_padded)

# Display predictions
for i, text in enumerate(new_texts):
    sentiment = "Positive" if predictions[i] > 0.5 else "Negative"
    print(f"Text: '{text}'\nPredicted Sentiment: {sentiment}\n")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step
Text: 'This movie is great!'
Predicted Sentiment: Positive

Text: 'The plot is confusing.'
Predicted Sentiment: Positive

Text: 'Amazing film with brilliant performances.'
Predicted Sentiment: Negative

Text: ' It was a horrible movie.'
Predicted Sentiment: Positive

Text: 'it could have been great if it was shorter.'
Predicted Sentiment: Positive

Text: 'I would have watched it if it was shorter'
Predicted Sentiment: Positive

