In [1]:
from keras.datasets import imdb

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words = 1000, oov_char = 0)

In [14]:
word_index = imdb.get_word_index()

In [3]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

max_sequence_length = 512

x_train = pad_sequences(x_train, maxlen = max_sequence_length)
x_test = pad_sequences(x_test, maxlen = max_sequence_length)

In [4]:
from keras.models import Sequential
from keras.layers import Input, Embedding, GRU, Dense

# num_words = 1000
input_dim = 1000
output_dim = 128

model = Sequential([
  Input(shape = (max_sequence_length, )),
  Embedding(input_dim = input_dim, output_dim = output_dim),
  GRU(32),
  Dense(1, activation = "sigmoid")
])

model.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])

In [5]:
model_output = model.fit(x_train, y_train, batch_size = 32, epochs = 20)

Epoch 1/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m186s[0m 233ms/step - accuracy: 0.6753 - loss: 0.5727
Epoch 2/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m178s[0m 202ms/step - accuracy: 0.8602 - loss: 0.3319
Epoch 3/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 196ms/step - accuracy: 0.8898 - loss: 0.2689
Epoch 4/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m149s[0m 191ms/step - accuracy: 0.9029 - loss: 0.2456
Epoch 5/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 189ms/step - accuracy: 0.9109 - loss: 0.2294
Epoch 6/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m147s[0m 188ms/step - accuracy: 0.9180 - loss: 0.2051
Epoch 7/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m155s[0m 198ms/step - accuracy: 0.9279 - loss: 0.1891
Epoch 8/20
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m148s[0m 189ms/step - accuracy: 0.9333 - loss: 0.1767
Epoch 9/

In [6]:
loss, accuracy = model.evaluate(x_test, y_test)

loss, accuracy

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 68ms/step - accuracy: 0.8547 - loss: 0.6666


(0.6493476033210754, 0.8553599715232849)

In [7]:
def process_review(review):
  review_words = review.lower().split()

  # the first 3 indexes are reserved in the imdb dataset
  # for example: "the" is the most frequent word, but actually its index is 4
  review = [word_index.get(word, 0) + 3 if word in word_index else "" for word in review_words]
  review = [x if x <= 1000 else 0 for x in review]
  review = pad_sequences([review], maxlen = max_sequence_length)

  return review

In [16]:
def predict_sentiment(review):
  review = process_review(review)
  predictions = model.predict(review)

  return "Positive" if predictions[0] > 0.5 else "Negative"

In [17]:
sample_data = [
  "The food was fantastic",
  "The movie was terrible",
  "I love this movie it is a great one"
]

predictions = [predict_sentiment(review) for review in sample_data]

for review, prediction in zip(sample_data, predictions):
  print(f"{prediction} Review: {review}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 325ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
Negative Review: The food was fantastic
Negative Review: The movie was terrible
Positive Review: I love this movie it is a great one
