In [2]:
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.layers import LSTM
from keras.datasets import imdb

max_features = 8000
maxlen = 88
batch_size = 32

print('載入資料...')
(x_train, y_train),(x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train),'train sequences')
print(len(x_test),'test sequences')

載入資料...
Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz
25000 train sequences
25000 test sequences


In [3]:
#將影評文字(Token)轉換成數字串列
print(x_train[0])

[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]


In [7]:
print('Pad sequences(samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

Pad sequences(samples x time)
x_train shape: (25000, 88)
x_test shape: (25000, 88)


In [8]:
print(x_train[0])

[  16   82    2    8    4  107  117 5952   15  256    4    2    7 3766
    5  723   36   71   43  530  476   26  400  317   46    7    4    2
 1029   13  104   88    4  381   15  297   98   32 2071   56   26  141
    6  194 7486   18    4  226   22   21  134  476   26  480    5  144
   30 5535   18   51   36   28  224   92   25  104    4  226   65   16
   38 1334   88   12   16  283    5   16 4472  113  103   32   15   16
 5345   19  178   32]


In [13]:
print('建立模型...')
model = Sequential()
model.add(Embedding(max_features, 128))
model.add(LSTM(128, dropout=0.3, recurrent_dropout=0.3))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

建立模型...


In [14]:
print('訓練和建立模型....')
model.fit(x_train, y_train,
         batch_size = batch_size,
         epochs=2,
         validation_data=(x_test,y_test))
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)

訓練和建立模型....
Train on 25000 samples, validate on 25000 samples
Epoch 1/2
Epoch 2/2


In [15]:
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, None, 128)         1024000   
_________________________________________________________________
lstm_3 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 129       
Total params: 1,155,713
Trainable params: 1,155,713
Non-trainable params: 0
_________________________________________________________________
None


In [17]:
predict=model.predict_classes(x_test)
predict[0:8]

array([[0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0]])

In [18]:
predict_classes=predict.reshape(-1)
predict_classes[0:8]

array([0, 1, 1, 1, 1, 1, 1, 0])

In [27]:
PositiveDict={1:'滿意',0:'不滿意'}
def display_info(i):
    print(x_test[i])
    print('影評真實值:',PositiveDict[y_test[i]],'預測值:',PositiveDict[predict_classes[i]])

In [28]:
display_info(1)

[  56    4  841    5  990  692    8    4 1669  398  229   10   10   13
 2822  670 5304   14    9   31    7   27  111  108   15 2033   19 7836
 1429  875  551   14   22    9 1193   21   45 4829    5   45  252    8
    2    6  565  921 3639   39    4  529   48   25  181    8   67   35
 1732   22   49  238   60  135 1162   14    9  290    4   58   10   10
  472   45   55  878    8  169   11  374 5687   25  203   28    8  818
   12  125    4 3077]
影評真實值: 滿意 預測值: 滿意
