In [2]:
import numpy as np
from keras.datasets import imdb
from keras.utils import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, SimpleRNN, Dense

# Set hyperparameters
max_features = 10000
max_len = 100
embedding_dim = 100
hidden_units = 32

# Load the IMDB dataset
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

# Pad sequences
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

# Define the model architecture
model = Sequential()
model.add(Embedding(max_features, embedding_dim, input_length=max_len))
model.add(SimpleRNN(hidden_units))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(x_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model
score, acc = model.evaluate(x_test, y_test, batch_size=32)
print('Test accuracy:', acc)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 0.7410799860954285


In [13]:
# Example prediction for 2 positive reviews
positive_reviews = ['The acting was amazing and the storyline was very compelling.',
                    'This movie is one of the badest I have seen in a long time.']
positive_sequences = [np.array([imdb.get_word_index()[word] if imdb.get_word_index().get(word) is not None and imdb.get_word_index()[word] < max_features else 0 for word in review.split()]) for review in positive_reviews]
positive_sequences = pad_sequences(positive_sequences, maxlen=max_len)
print(model.predict(positive_sequences))

[[0.99985325]
 [0.09049376]]
