In [9]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Load IMDB dataset
max_words = 10000   # vocab size
max_len = 200       # max review length

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_words)

# Pad sequences to same length
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)

# Build LSTM model
model = Sequential()
model.add(Embedding(max_words, 128, input_length=max_len))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train
model.fit(X_train, y_train, batch_size=64, epochs=3, validation_data=(X_test, y_test))


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Epoch 1/3




[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m300s[0m 758ms/step - accuracy: 0.7027 - loss: 0.5533 - val_accuracy: 0.8399 - val_loss: 0.3787
Epoch 2/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m297s[0m 760ms/step - accuracy: 0.8369 - loss: 0.3762 - val_accuracy: 0.8538 - val_loss: 0.3500
Epoch 3/3
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m358s[0m 853ms/step - accuracy: 0.8882 - loss: 0.2862 - val_accuracy: 0.8385 - val_loss: 0.3962


<keras.src.callbacks.history.History at 0x7db5e03ccd70>

In [13]:
test_texts = [
    "I absolutely loved this movie, it was fantastic!",
    "The product broke after one day of use.",
    "The movie was okay, nothing special but not terrible either."
]

test_seq = tokenizer.texts_to_sequences(test_texts)
test_pad = pad_sequences(test_seq, maxlen=max_len)
predictions = model.predict(test_pad)

for text, pred in zip(test_texts, predictions):
    sentiment = "Positive" if pred > 0.65 else "Negative"
    print(f"{text} --> {sentiment} ({pred[0]:.4f})")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step
I absolutely loved this movie, it was fantastic! --> Positive (0.9028)
The product broke after one day of use. --> Negative (0.5160)
The movie was okay, nothing special but not terrible either. --> Negative (0.1406)


In [11]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

from tensorflow.keras.datasets import imdb

word_index = imdb.get_word_index()


word_index = {k:(v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2
word_index["<UNUSED>"] = 3


tokenizer = Tokenizer(num_words=max_words, oov_token="<UNK>")

tokenizer.word_index = word_index



Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [14]:
test_texts = [
    "I absolutely loved this movie, it was fantastic!",
    "The product broke after one day of use.",
    "The movie was okay, nothing special but not terrible either."
]

test_seq = tokenizer.texts_to_sequences(test_texts)
test_pad = pad_sequences(test_seq, maxlen=max_len)
predictions = model.predict(test_pad)

for text, pred in zip(test_texts, predictions):
    sentiment = "Positive" if pred > 0.65 else "Negative"
    print(f"{text} --> {sentiment} ({pred[0]:.4f})")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 115ms/step
I absolutely loved this movie, it was fantastic! --> Positive (0.9028)
The product broke after one day of use. --> Negative (0.5160)
The movie was okay, nothing special but not terrible either. --> Negative (0.1406)
