In [1]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers, Sequential

In [2]:
batchsz = 128
split = 0.2
total_words = 10000
max_review_len = 250
embedding_len = 64

In [3]:
(x_train, y_train), (x_test, y_test) = keras.datasets.imdb.load_data(num_words=total_words)

  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


In [4]:
# 数字编码表
word_index = keras.datasets.imdb.get_word_index()
word_index = {k:(v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2  # unknown
word_index["<UNUSED>"] = 3
# 翻转编码表
reverse_word_index = {value:key for (key, value) in word_index.items()}

In [5]:
def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])

In [6]:
#查看其中某一条评论
decode_review(x_train[8])

"<START> just got out and cannot believe what a brilliant documentary this is rarely do you walk out of a movie theater in such awe and <UNK> lately movies have become so over hyped that the thrill of discovering something truly special and unique rarely happens <UNK> <UNK> did this to me when it first came out and this movie is doing to me now i didn't know a thing about this before going into it and what a surprise if you hear the concept you might get the feeling that this is one of those <UNK> movies about an amazing triumph covered with over the top music and trying to have us fully convinced of what a great story it is telling but then not letting us in <UNK> this is not that movie the people tell the story this does such a good job of capturing every moment of their involvement while we enter their world and feel every second with them there is so much beyond the climb that makes everything they go through so much more tense touching the void was also a great doc about mountain 

In [7]:
pad_x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=max_review_len)
pad_x_test = keras.preprocessing.sequence.pad_sequences(x_test,maxlen=max_review_len)
db_data = tf.data.Dataset.from_tensor_slices((pad_x_train,y_train)).shuffle(1000)
db_train = db_data.take(int(len(pad_x_train)*(1-split)))
db_val = db_data.skip(int(len(pad_x_train)*(1-split)))
db_train = db_train.batch(batchsz,drop_remainder=True)
db_val = db_val.batch(batchsz,drop_remainder=True)
db_test = tf.data.Dataset.from_tensor_slices((pad_x_test,y_test))
db_test = db_test.batch(batchsz,drop_remainder=True)

In [8]:
class MyRNN(keras.Model):
    def __init__(self, units):
        super(MyRNN, self).__init__()

        self.embedding = Sequential([
            layers.Embedding(total_words,embedding_len,
                             input_length=max_review_len)
        ])
        self.rnn = Sequential([
            layers.Bidirectional(layers.LSTM(units,dropout=0.3))
        ])

        self.outlayer = Sequential([
            layers.Dense(32,activation='relu'),
            layers.Dense(1,activation='sigmoid')
        ])

    def call(self, inputs, training=None):
        x = inputs
        x = self.embedding(x)
        x = self.rnn(x)

        x = self.outlayer(x,training)
        return x

In [9]:
from tensorflow.keras.callbacks import TensorBoard
tbCallBack = TensorBoard(log_dir="./log")

In [10]:
units = 32
epochs = 5

model = MyRNN(units)
model.compile(optimizer = 'adam',
                  loss = 'binary_crossentropy',
                  metrics=['accuracy'])
model.build(input_shape=(None,max_review_len))
model.summary()

Model: "my_rnn"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential (Sequential)      (None, 250, 64)           640000    
_________________________________________________________________
sequential_1 (Sequential)    (None, 64)                24832     
_________________________________________________________________
sequential_2 (Sequential)    (None, 1)                 2113      
Total params: 666,945
Trainable params: 666,945
Non-trainable params: 0
_________________________________________________________________


In [11]:
history = model.fit(db_train, epochs=epochs, validation_data=db_val, callbacks=tbCallBack)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [12]:
model.evaluate(db_test)



[0.420837938785553, 0.8633813858032227]

In [17]:
import os
os.makedirs('models',exist_ok=True)
model.save_weights('models/imdb_weights.h5')