In [1]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Embedding, SimpleRNN

from tensorflow.keras.datasets import imdb
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing import sequence

import matplotlib.pyplot as plt

import numpy as np

In [2]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000, maxlen=200)

In [3]:
print(len(x_train))
print(len(x_test))

25000
3913


In [4]:
print(x_train[0])
print(len(x_train[0]))

[1, 194, 1153, 194, 8255, 78, 228, 5, 6, 1463, 4369, 5012, 134, 26, 4, 715, 8, 118, 1634, 14, 394, 20, 13, 119, 954, 189, 102, 5, 207, 110, 3103, 21, 14, 69, 188, 8, 30, 23, 7, 4, 249, 126, 93, 4, 114, 9, 2300, 1523, 5, 647, 4, 116, 9, 35, 8163, 4, 229, 9, 340, 1322, 4, 118, 9, 4, 130, 4901, 19, 4, 1002, 5, 89, 29, 952, 46, 37, 4, 455, 9, 45, 43, 38, 1543, 1905, 398, 4, 1649, 26, 6853, 5, 163, 11, 3215, 2, 4, 1153, 9, 194, 775, 7, 8255, 2, 349, 2637, 148, 605, 2, 8003, 15, 123, 125, 68, 2, 6853, 15, 349, 165, 4362, 98, 5, 4, 228, 9, 43, 2, 1157, 15, 299, 120, 5, 120, 174, 11, 220, 175, 136, 50, 9, 4373, 228, 8255, 5, 2, 656, 245, 2350, 5, 4, 9837, 131, 152, 491, 18, 2, 32, 7464, 1212, 14, 9, 6, 371, 78, 22, 625, 64, 1382, 9, 8, 168, 145, 23, 4, 1690, 15, 16, 4, 1355, 5, 28, 6, 52, 154, 462, 33, 89, 78, 285, 16, 145, 95]
189


In [5]:
review = x_train[1]

In [6]:
vocab = imdb.get_word_index()

In [9]:
len(vocab)

88584

In [10]:
inv_vocab = dict(zip(vocab.values(), vocab.keys()))

In [11]:
len(inv_vocab)

88584

In [12]:
inv_vocab[1]

'the'

In [15]:
[inv_vocab[index] for index in inv_vocab if index <= 3]

['a', 'the', 'and']

In [18]:
" ".join([inv_vocab[index-3] for index in review if index >= 3])

"this has to be one of the worst films of the 1990s when my friends i were watching this film being the target audience it was aimed at we just sat watched the first half an hour with our jaws touching the floor at how bad it really was the rest of the time everyone else in the theatre just started talking to each other leaving or generally crying into their popcorn that they actually paid money they had working to watch this feeble excuse for a film it must have looked like a great idea on paper but on film it looks like no one in the film has a clue what is going on crap acting crap costumes i can't get across how this is to watch save yourself an hour a bit of your life"

In [19]:
x_train_padded = sequence.pad_sequences(x_train, maxlen=200)
x_test_padded = sequence.pad_sequences(x_test, maxlen=200)


In [20]:
x_train_padded.shape

(25000, 200)

In [21]:
len(x_train_padded[1])

200

In [22]:
# x_train_padded[1]

In [23]:
y_train

array([0, 0, 0, ..., 1, 0, 0])

In [141]:
in_layer = Input(shape=(200,))
embedding = Embedding(input_dim=10000, output_dim=100)(in_layer)
rnn_layer = SimpleRNN(units=60)(embedding)
out_layer = Dense(1, activation="sigmoid")(rnn_layer)

In [142]:
model = Model(inputs=in_layer, outputs=out_layer)


In [143]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 200)]             0         
_________________________________________________________________
embedding (Embedding)        (None, 200, 100)          1000000   
_________________________________________________________________
simple_rnn (SimpleRNN)       (None, 60)                9660      
_________________________________________________________________
dense (Dense)                (None, 1)                 61        
Total params: 1,009,721
Trainable params: 1,009,721
Non-trainable params: 0
_________________________________________________________________


In [144]:
model.compile(optimizer="rmsprop", loss="binary_crossentropy", metrics=["accuracy"])


In [145]:
model.fit(x_train_padded, y_train, batch_size=1000, epochs=10, steps_per_epoch=10)

Train on 25000 samples
Epoch 1/10

<tensorflow.python.keras.callbacks.History at 0x65e60af60>

In [146]:
model.evaluate(x_test_padded, y_test, verbose=0)

[0.3496013595385835, 0.8596984]