In [None]:
from __future__ import print_function

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM
from keras.datasets import imdb

import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
MAX_FEATURES = 20000
# cut texts after this number of words (among top MAX_FEATURES most common words)
MAX_SENTENCE_LENGTH = 80

print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=MAX_FEATURES)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

# View one example of our dataset before our preprocessing.
print("\n\nExample one before our preprocessing")
print(x_train[0])

print('\n\nPad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=MAX_SENTENCE_LENGTH)
x_test = sequence.pad_sequences(x_test, maxlen=MAX_SENTENCE_LENGTH)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

# View one example of our dataset after our preprocessing.
print("\n\nExample one after our preprocessing")
print(x_train[0])

Loading data...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
25000 train sequences
25000 test sequences


Example one before our preprocessing
[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 19193, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 10311, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 12118, 1029, 13, 104, 88, 4, 381, 15, 297, 98,

In [None]:
EMBEDDING_SIZE = 128
HIDDEN_LAYER_SIZE = 128
BATCH_SIZE = 32
NUM_EPOCHS = 10

print('Build model...')
model = Sequential()
model.add(Embedding(MAX_FEATURES, EMBEDDING_SIZE, input_length=MAX_SENTENCE_LENGTH))
model.add(LSTM(HIDDEN_LAYER_SIZE, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

# summary of our model.
model.summary()

# Compile the model.
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

Build model...




In [None]:
print('Train...')
history = model.fit(x_train, y_train,
          batch_size=BATCH_SIZE,
          epochs=NUM_EPOCHS,
          validation_data=(x_test, y_test))

Train...
Epoch 1/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m179s[0m 222ms/step - accuracy: 0.7141 - loss: 0.5386 - val_accuracy: 0.8376 - val_loss: 0.3702
Epoch 2/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 214ms/step - accuracy: 0.8788 - loss: 0.3012 - val_accuracy: 0.8364 - val_loss: 0.3730
Epoch 3/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m224s[0m 243ms/step - accuracy: 0.9199 - loss: 0.2123 - val_accuracy: 0.8326 - val_loss: 0.4168
Epoch 4/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m179s[0m 213ms/step - accuracy: 0.9411 - loss: 0.1561 - val_accuracy: 0.8304 - val_loss: 0.4870
Epoch 5/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 213ms/step - accuracy: 0.9618 - loss: 0.1066 - val_accuracy: 0.8258 - val_loss: 0.5341
Epoch 6/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 214ms/step - accuracy: 0.9726 - loss: 0.0788 - val_accuracy: 0.8210 - val_loss: 0.

In [None]:
score, acc = model.evaluate(x_test, y_test,
                            batch_size=BATCH_SIZE)
print('Test score:', score)
print('Test accuracy:', acc)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 47ms/step - accuracy: 0.8116 - loss: 0.8851
Test score: 0.8725560903549194
Test accuracy: 0.810479998588562


In [None]:
plt.subplot(211)
plt.title("Accuracy")
plt.plot(history.history["accuracy"], color="g", label="Train")
plt.plot(history.history["val_accuracy"], color="b", label="Validation")
plt.legend(loc="best")
plt.subplot(212)
plt.title("Loss")
plt.plot(history.history["loss"], color="g", label="Train")
plt.plot(history.history["val_loss"], color="b", label="Validation")
plt.legend(loc="best")
plt.tight_layout()
plt.show()