In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the IMDb dataset
vocab_size = 80000  # Use top 40,000 words from the IMDb dataset
maxlen = 200        # Fixed length for all sequences

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

# Pad sequences to ensure equal length
x_train = pad_sequences(x_train, maxlen=maxlen, padding='post', truncating='post')
x_test = pad_sequences(x_test, maxlen=maxlen, padding='post', truncating='post')

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Build the model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=64),
    LSTM(128),
    Dense(1, activation='sigmoid')
])

In [None]:
# Compile the model
model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

# Train the model
history = model.fit(
    x_train, y_train,
    validation_split=0.1,
    epochs=5,
    batch_size=64
)

Epoch 1/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 12ms/step - accuracy: 0.5043 - loss: 0.6934 - val_accuracy: 0.5264 - val_loss: 0.6951
Epoch 2/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.5481 - loss: 0.6812 - val_accuracy: 0.7616 - val_loss: 0.5463
Epoch 3/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - accuracy: 0.6120 - loss: 0.6372 - val_accuracy: 0.5328 - val_loss: 0.6903
Epoch 4/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.5697 - loss: 0.6694 - val_accuracy: 0.5304 - val_loss: 0.6881
Epoch 5/5
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - accuracy: 0.6264 - loss: 0.6168 - val_accuracy: 0.8264 - val_loss: 0.4342


In [None]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {test_loss }")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - accuracy: 0.8184 - loss: 0.4404
Test Accuracy: 0.443551242351532


In [None]:
print(x_test,y_test,sep="\n\n")

[[   1  591  202 ...    0    0    0]
 [   1   14   22 ... 2033   19 7836]
 [   1  111  748 ...  655 2212    5]
 ...
 [   1   13 1408 ...    0    0    0]
 [   1   11  119 ...    0    0    0]
 [   1    6   52 ...    0    0    0]]

[0 1 1 ... 0 0 0]


In [None]:
new_review = "I was thoroughly disappointed by this film. It was a complete waste of time."
# Tokenize and preprocess the new review
word_index = imdb.get_word_index()
sequence = [[word_index.get(word, 0) for word in new_review.lower().split()]]
padded_sequence = pad_sequences(sequence, maxlen=maxlen, padding='post', truncating='post')

# Predict the sentiment
prediction = model.predict(padded_sequence)
sentiment = "Positive" if prediction[0] > 0.5 else "Negative"
print(f"The sentiment of the review is: {sentiment}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step
The sentiment of the review is: Negative
