In [1]:
from keras.layers import Dense, LSTM, Flatten, Embedding
from keras.models import Model, Sequential
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
from keras.callbacks import EarlyStopping

import numpy as np
import matplotlib.pyplot as plt

Using TensorFlow backend.


In [2]:
# 获取数据
max_word = 10000
max_length = 500

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words = max_word)

x_train = pad_sequences(x_train, maxlen = max_length , padding = "post", truncating = "post", value = 0, dtype = "int32")
x_test = pad_sequences(x_test, maxlen = max_length, padding = "post", truncating = "post", value = 0, dtype = "int32")

In [3]:
# 建立模型
embed_size = 32

model = Sequential()

model.add(Embedding(input_dim = max_word, output_dim = embed_size, input_length = max_length, embeddings_initializer = "glorot_uniform", name = "Embedding_1"))
model.add(LSTM(units = 32, activation = "tanh", return_sequences = False, unroll = False, name = "LSTM_1"))
model.add(Dense(units = 16, activation = "relu", name = "Dense_1"))
model.add(Dense(units = 1, activation = "sigmoid", name = "Dense_2"))

model. summary()

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Embedding_1 (Embedding)      (None, 500, 32)           320000    
_________________________________________________________________
LSTM_1 (LSTM)                (None, 32)                8320      
_________________________________________________________________
Dense_1 (Dense)              (None, 16)                528       
_________________________________________________________________
Dense_2 (Dense)              (None, 1)                 17        
Total params: 328,865
Trainable params: 328,865
Non-trainable params: 0
_________________________________________________________________


In [4]:
# 编译模型
model.compile(optimizer = "rmsprop", loss = "binary_crossentropy", metrics = ["acc"])

# 训练模型
earlyStopping = EarlyStopping(monitor = "val_loss", patience = 5)
model.fit(x_train, y_train, batch_size = 128, epochs = 20, validation_split = 0.3, verbose = 1)

Instructions for updating:
Use tf.cast instead.
Train on 17500 samples, validate on 7500 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x152cce91c18>

In [5]:
# 测试模型
model.evaluate(x_train, y_train, verbose = 1)



[0.4638665198135376, 0.78147999999999995]

### 分析：
    LSTM和SimpleRNN相比，在500step下依然能保持不梯度消失/爆炸，可见LSTM可以在一定程度上避免梯度消失/爆炸问题。
    LSTM通过Ct来保存Cell状态，而Ct-1和Ct之间是通过加法来处理的，这样可以保证在较长的时间步中，初期的状态也能对当前状态产生一定影响，而SimpleRNN通过乘法处理，在较长的时间步中，可能会导致初期的状态由于权重w的指数级相乘而趋于0或者过大。