In [1]:
# Import resources
from keras.datasets import imdb
from keras.preprocessing import sequence
from keras import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout
import numpy as np

Using TensorFlow backend.


###1. Get the dataset

In [2]:
# Download data from keras.datasets, limit of 5000 words
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=5000)

Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz


In [3]:
# Download indexed review words
words = imdb.get_word_index()
# Format indexed words
words_index = {index: word for word, index in words.items()}

Downloading data from https://s3.amazonaws.com/text-datasets/imdb_word_index.json


###2. Preprocessing the Data

In [0]:
# Prepare data for model by padding reviews and set max length to 500
X_train = sequence.pad_sequences(X_train, maxlen=500)
X_test = sequence.pad_sequences(X_test, maxlen=500)

In [0]:
# Split data into validation set
X_validation = X_train[:200]
y_validation = y_train[:200]
X_train1 = X_train[200:]
y_train1 = y_train[200:]

###3. Build the Model

In [6]:
# Build the model
model = Sequential()
model.add(Embedding(5000, 32, input_length=500))
model.add(LSTM(128))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])





Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


###4. Train the model

In [7]:
# Fit model
model.fit(X_train, y_train, validation_split=0.01, batch_size=125, epochs=5, verbose=1)




Train on 24750 samples, validate on 250 samples
Epoch 1/5





Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f53289ed198>

###5. Test the Model

In [8]:
# Evaluate accuracy
model_accuracy = model.evaluate(X_test, y_test, verbose=1)

# View accuracy evaluation
print(f'Model accuracy: {model_accuracy[1]*100}')

Model accuracy: 86.74


###6. Predict Something

In [59]:
# Select a random integer
num = np.random.randint(low=0, high=100, size=1)

# Make a prediction randomly chosen
pred = model.predict(X_test[[num]])

if (pred < .5) and (y_test[num] == 0):
  print(f'Negative review, predictive score: {pred}, actual:{y_test[num]}')
elif (pred >= .5) and (y_test[num] == 1):
  print(f'Positive review, predictive score: {pred}, actual:{y_test[num]}')
else:
  print(f'The prediction is incorrect')

Positive review, predictive score: [[0.55510193]], actual:[1]


  after removing the cwd from sys.path.
