In [14]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

In [None]:
# only keep the top 10,000 most frequent words
num_words = 10000
maxlen = 200  # max length of each review

In [16]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=num_words)

In [17]:
print("训练集:", x_train.shape, y_train.shape)
print("测试集:", x_test.shape, y_test.shape)

训练集: (25000,) (25000,)
测试集: (25000,) (25000,)


In [None]:
# padding sequences to the same length
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

In [19]:
print("训练集:", x_train.shape, y_train.shape)
print("测试集:", x_test.shape, y_test.shape)

训练集: (25000, 200) (25000,)
测试集: (25000, 200) (25000,)


In [20]:
y_train[0]

1

In [21]:
embedding_dim = 128

In [22]:
model = Sequential([
    Embedding(input_dim=num_words, output_dim=embedding_dim, input_length=maxlen),
    LSTM(128, dropout=0.2, recurrent_dropout=0.2),
    Dense(1, activation='sigmoid')
])



In [23]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 200, 128)          1280000   
                                                                 
 lstm (LSTM)                 (None, 128)               131584    
                                                                 
 dense (Dense)               (None, 1)                 129       
                                                                 
Total params: 1,411,713
Trainable params: 1,411,713
Non-trainable params: 0
_________________________________________________________________


In [27]:
history = model.fit(
    x_train, y_train,
    batch_size=64,
    epochs=5,
    validation_split=0.2,
    workers=8
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [28]:
score, acc = model.evaluate(x_test, y_test, batch_size=64)
print("测试集 Loss:", score)
print("测试集 Accuracy:", acc)

测试集 Loss: 0.4894622564315796
测试集 Accuracy: 0.8539599776268005
