In [1]:
import tensorflow as tf

In [2]:
from tensorflow.keras.datasets import imdb

In [3]:
import numpy as np


In [4]:
num_words = 10000 # need only top commonly ocurring 10k

In [5]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [6]:
max_len = 20 # we'll pad sequences to a length of 20

In [7]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [8]:
#step 2: prerpocess the data

In [9]:
#pad the sequences to ensure all sequences have the same length

In [10]:
x_train = pad_sequences(x_train, maxlen=max_len)

In [11]:
x_test = pad_sequences(x_test, maxlen=max_len)

In [12]:
x_train.shape

(25000, 20)

In [13]:
# step 3: build the LSTM Model

In [16]:
model = tf.keras.Sequential([tf.keras.layers.Embedding(input_dim=num_words, output_dim=128, input_length=max_len), tf.keras.layers.LSTM(64, activation='relu', dropout=0.2), tf.keras.layers.Dense(1, activation='sigmoid')])



In [17]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [18]:
model.summary()

In [19]:
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_data=(x_test, y_test))

Epoch 1/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 55ms/step - accuracy: 0.6475 - loss: 0.6173 - val_accuracy: 0.7665 - val_loss: 0.4763
Epoch 2/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 53ms/step - accuracy: 0.8126 - loss: 0.4048 - val_accuracy: 0.7676 - val_loss: 0.4737
Epoch 3/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 54ms/step - accuracy: 0.8522 - loss: 0.3328 - val_accuracy: 0.7575 - val_loss: 0.5122
Epoch 4/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 51ms/step - accuracy: 0.8819 - loss: 0.2730 - val_accuracy: 0.7558 - val_loss: 0.6910
Epoch 5/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 53ms/step - accuracy: 0.8995 - loss: 0.2209 - val_accuracy: 0.7520 - val_loss: 0.7023


<keras.src.callbacks.history.History at 0x7e2ce92f5590>

In [20]:
#step 6: evaluate the model

In [21]:
loss, accuracy = model.evaluate(x_test, y_test)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.7505 - loss: 0.7089


In [22]:
print(f'Test Loss: {loss:.4f}')

Test Loss: 0.7023


In [23]:
print(f'Test accuracy: {accuracy:.4f}')

Test accuracy: 0.7520


In [24]:
# Step 7: Make predictions
def predict_sentiment(review):
    # Tokenize the review using the same tokenization used by IMDB dataset
    review_seq = imdb.get_word_index()
    review_tokens = [review_seq.get(word, 0) for word in review.lower().split() if word in review_seq]
    review_tokens = pad_sequences([review_tokens], maxlen=max_len)

    # Make the prediction
    prediction = model.predict(review_tokens)
    sentiment = "Positive" if prediction > 0.5 else "Negative"
    return sentiment

In [25]:
# Example of making predictions
review = "This movie was fantastic! I loved the plot and the acting was amazing."
print(f"Review Sentiment: {predict_sentiment(review)}")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 264ms/step
Review Sentiment: Positive


In [26]:
# Example of making predictions
review1 = "This movie could have been better! the plot seemed pale and flow was monotonous."
print(f"Review Sentiment: {predict_sentiment(review1)}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
Review Sentiment: Negative
