In [1]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding
from tensorflow.keras.layers import LSTM, GRU
from tensorflow.keras.datasets import imdb

In [2]:
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words = 20000)
# 常用字數量設為 20000

In [3]:
x_train = sequence.pad_sequences(x_train, maxlen = 200)
x_test = sequence.pad_sequences(x_test, maxlen = 200)
#以200個字為長度

In [4]:
model = Sequential()
model.add(Embedding(20000, 150))
model.add(GRU(128, dropout = 0.1, recurrent_dropout = 0.1))
model.add(Dense(1, activation = 'sigmoid'))
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
#NN以GRU進行
#Embedding降維成150
#dropout = 0.1

In [5]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 150)         3000000   
_________________________________________________________________
gru (GRU)                    (None, 128)               107520    
_________________________________________________________________
dense (Dense)                (None, 1)                 129       
Total params: 3,107,649
Trainable params: 3,107,649
Non-trainable params: 0
_________________________________________________________________


In [6]:
# Embedding為線性函數，故不加bias (150*20000 = 3000000)

In [7]:
model.fit(x_train, y_train, batch_size= 64, epochs = 5, validation_data = (x_test, y_test))
# batch size = 64, epochs = 5

Train on 25000 samples, validate on 25000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1a8287513c8>

In [10]:
result = model.predict(x_test)

In [11]:
loss, acc = model.evaluate(x_test, y_test)



In [12]:
print(f"loss: {loss: .2f}\naccuracy rate: {acc * 100: .2f}%")

loss:  0.48
accuracy rate:  85.72%


In [14]:
model.save('myRNNmodelforIMDB.h5')