In [1]:
%env KERAS_BACKEND=tensorflow
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import imdb

env: KERAS_BACKEND=tensorflow


Using TensorFlow backend.


In [2]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words = 10000)

Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz


In [3]:
from keras.preprocessing import sequence
x_train1 = sequence.pad_sequences(x_train,maxlen=150)
x_test1 = sequence.pad_sequences(x_test,maxlen=150) 

# 課堂之模型

In [4]:
from keras.models import Sequential
from keras.layers import Dense , Embedding
from keras.layers import LSTM

In [5]:
N = 3 #文字壓縮到幾維
K = 4 #LSTM層幾個神經元
model = Sequential()
model.add(Embedding(10000,N))
model.add(LSTM(K))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train1,y_train,batch_size =32,epochs = 5,validation_data = (x_test1, y_test))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Train on 25000 samples, validate on 25000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7166e5c128>

# 自己建模型

In [6]:
N = 6 #文字壓縮到幾維
K = 9 #LSTM層幾個神經元
model1 = Sequential()
model1.add(Embedding(10000,N))
model1.add(LSTM(K))
model1.add(Dense(1,activation='sigmoid'))
model1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [7]:
model1.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, None, 6)           60000     
_________________________________________________________________
lstm_2 (LSTM)                (None, 9)                 576       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 10        
Total params: 60,586
Trainable params: 60,586
Non-trainable params: 0
_________________________________________________________________


In [8]:
model1.fit(x_train1,y_train,batch_size =64 ,epochs = 5,validation_data = (x_test1, y_test))

Train on 25000 samples, validate on 25000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7147bcd2e8>

In [11]:
score = model1.evaluate(x_test1, y_test)



In [12]:
print(f'測試資料的 loss = {score[0]}')
print(f'測試資正確率 = {score[1]}')

測試資料的 loss = 0.3940972464466095
測試資正確率 = 0.852


# 的確有提升個2%左右，但仍有進步空間。並發現有些overfit而加上dropout。

In [14]:
from keras.layers import Dropout
N = 100 #文字壓縮到幾維
K = 25 #LSTM層幾個神經元
model2 = Sequential()
model2.add(Embedding(10000,N))
model.add(Dropout(0.25))
model2.add(LSTM(K))
model.add(Dropout(0.25))
model2.add(Dense(1,activation='sigmoid'))
model2.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [17]:
model2.fit(x_train1, y_train, validation_split=0.01,batch_size=32, epochs=5)

Train on 24750 samples, validate on 250 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7160328fd0>

In [18]:
score = model2.evaluate(x_test1, y_test)
print(f'測試資料的 loss = {score[0]}')
print(f'測試資正確率 = {score[1]}')

測試資料的 loss = 0.4827397626543045
測試資正確率 = 0.85704


# 發現 EarlyStopping 可用來追蹤驗證集上的正確率!! 來抓max看看

In [21]:
from keras.callbacks import EarlyStopping
from keras.layers import Dropout
early_stopping = EarlyStopping(monitor='val_acc',verbose=2,mode='max',patience=1)
N = 100 #文字壓縮到幾維
K = 25 #LSTM層幾個神經元
model3 = Sequential()
model3.add(Embedding(10000,N))
model3.add(Dropout(0.25))
model3.add(LSTM(K))
model3.add(Dropout(0.25))
model3.add(Dense(1,activation='sigmoid'))

In [23]:
model3.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model3.fit(x_train1, y_train, validation_split=0.01,batch_size=32, epochs=10,callbacks=[early_stopping])

Train on 24750 samples, validate on 250 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 00004: early stopping


<keras.callbacks.History at 0x71604590b8>

In [26]:
score = model3.evaluate(x_test1, y_test)
print(f'測試資料的 loss = {score[0]}')
print(f'測試資正確率 = {score[1]}')

測試資料的 loss = 0.4386029398012161
測試資正確率 = 0.856


# val_acc曾經達到 0.8800，但在early stopping當下為0.856