In [1]:
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import models, layers
import random

In [3]:
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=10000)

X_train = pad_sequences(X_train, maxlen=200)
X_test = pad_sequences(X_test, maxlen=200)

X_train = np.array(X_train)
y_train = np.array(y_train)
X_test = np.array(X_test)
y_test = np.array(y_test)

In [4]:
def build_model():
    model = models.Sequential()
    model.add(layers.Embedding(input_dim=10000,output_dim=128))
    model.add(layers.LSTM(64, dropout=0.3, recurrent_dropout=0.3))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

model = build_model()

In [5]:
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 27ms/step - accuracy: 0.6930 - loss: 0.5670 - val_accuracy: 0.7090 - val_loss: 0.5511
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 27ms/step - accuracy: 0.8215 - loss: 0.4063 - val_accuracy: 0.8178 - val_loss: 0.4204
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 27ms/step - accuracy: 0.8751 - loss: 0.3088 - val_accuracy: 0.7426 - val_loss: 0.5083
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 27ms/step - accuracy: 0.8639 - loss: 0.3223 - val_accuracy: 0.8492 - val_loss: 0.3940
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 27ms/step - accuracy: 0.9063 - loss: 0.2417 - val_accuracy: 0.8280 - val_loss: 0.4121
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 27ms/step - accuracy: 0.9166 - loss: 0.2160 - val_accuracy: 0.8458 - val_loss: 0.4199
Epoch 7/10
[1m6

<keras.src.callbacks.history.History at 0x1ccbadf8500>

In [6]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Testing Accuracy: {accuracy * 100:.2f}%")

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.8361 - loss: 0.5509
Testing Accuracy: 83.79%


In [9]:
word_index = imdb.get_word_index()
reverse_word_index = {value:key for (key,value) in word_index.items()}

def decode_review(encoded_review):
    return ' '.join([reverse_word_index.get(i-3,'?') for i in encoded_review])

sample_indices = random.sample(range(len(X_test)),10)

for index in sample_indices:
    review = decode_review(X_test[index])
    preprocessed_review = pad_sequences([[word_index.get(word,0) for word in review.split()]], maxlen=200)
    prediction = model.predict(preprocessed_review)[0][0]
    actual_sentiment = y_test[index]

    print(f'Review: \n\"{review}\"\n'
          f'Predicted Sentiment: {"Positive" if prediction > 0.5 else "Negative"}\n'
          f'Actual Sentiment: {"Positive" if actual_sentiment==1 else "Negative"}\n'
          )

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step
Review: 
"? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? while the twilight zone was a wonderful show it was also very uneven with some great episodes some lousy ones and many in between don't believe the die hard fans there were some ? and this was definitely one of them br br in a plot that is obviously meant to be an attack on ? castro a near ? peter falk in lots of makeup and a beard ? a magic mirror that allows him to realize who all his enemies are so he can them while i do believe that castro is a thug and dictator and ? of thousands of ? and political prisoners will ? to this it's amazing how this sort of preachy episode actually makes audiences laugh at the american efforts to ? the creep and actually makes castro seem okay think about it ? and company wanted to hurt castro but instead only seemed to be obvious preachy and silly in the process br br it's indeed bad almost laughably bad when see