程式5-8

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
import tensorflow as tf
import logging
tf.get_logger().setLevel(logging.ERROR)


EPOCHS = 2
BATCH_SIZE = 256
INPUT_FILE_NAME = '/content/frankenstein.txt'
WINDOW_LENGTH = 40
WINDOW_STEP = 3
BEAM_SIZE = 8
NUM_LETTERS = 11
MAX_LENGTH = 50


程式5-9

In [None]:
# 開啟文字檔
file = open(INPUT_FILE_NAME, 'r', encoding='utf-8-sig')
text = file.read()
file.close()

# 將文本轉為小寫、去掉換行符號與多餘空格
text = text.lower()
text = text.replace('\n', ' ')
text = text.replace('  ', ' ')

# 將字元一一編號並製作雙向對照字典
unique_chars = list(set(text))
char_to_index = dict((ch, index) for index,
                     ch in enumerate(unique_chars))
index_to_char = dict((index, ch) for index,
                     ch in enumerate(unique_chars))
encoding_width = len(char_to_index)


程式5-10

In [None]:
# 建立訓練樣本
fragments = []
targets = []
for i in range(0, len(text) - WINDOW_LENGTH, WINDOW_STEP):
    fragments.append(text[i: i + WINDOW_LENGTH])
    targets.append(text[i + WINDOW_LENGTH])

# 將樣本轉為 One-hot 格式
X = np.zeros((len(fragments), WINDOW_LENGTH, encoding_width))
y = np.zeros((len(fragments), encoding_width))
for i, fragment in enumerate(fragments):
    for j, char in enumerate(fragment):
        X[i, j, char_to_index[char]] = 1
    target_char = targets[i]
    y[i, char_to_index[target_char]] = 1


In [None]:
X[0]

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [None]:
print(targets[0])
print(y[0])

e
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]


程式5-11

In [None]:
# 建構並訓練模型
model = Sequential()
model.add(LSTM(128, return_sequences=True,
                        dropout=0.2, recurrent_dropout=0.2,
                        input_shape=(None, encoding_width)))
model.add(LSTM(128, dropout=0.2,
                        recurrent_dropout=0.2))
model.add(Dense(encoding_width, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam')
model.summary()
history = model.fit(X, y, validation_split=0.05,
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS, verbose=2,
                    shuffle=True)


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, None, 128)         98304     
                                                                 
 lstm_1 (LSTM)               (None, 128)               131584    
                                                                 
 dense (Dense)               (None, 63)                8127      
                                                                 
Total params: 238015 (929.75 KB)
Trainable params: 238015 (929.75 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/2
565/565 - 417s - loss: 2.7727 - val_loss: 2.6257 - 417s/epoch - 737ms/step
Epoch 2/2
565/565 - 417s - loss: 2.3552 - val_loss: 2.4787 - 417s/epoch - 738ms/step


程式5-12

In [None]:
# 建立最初方案，各方案以 3 元素表示
# 這 3 個元素分別是 (機率、字元串、字元串對應的 One-hot 編碼)
letters = 'the body'
one_hots = []
for i, char in enumerate(letters):
    x = np.zeros(encoding_width)
    x[char_to_index[char]] = 1
    one_hots.append(x)
beams = [(np.log(1.0), letters, one_hots)]

# 推演緊接的 NUM_LETTERS 個字元 (本例會推演 11 個字)
for i in range(NUM_LETTERS):
    minibatch_list = []
    # 將現有方案的字元串 One-hot 編碼取出、組成批次資料，以便輸入模型繼續預測後續字元
    for triple in beams:
        minibatch_list.append(triple[2])
    minibatch = np.array(minibatch_list)
    y_predict = model.predict(minibatch, verbose=0)
    new_beams = []
    for j, softmax_vec in enumerate(y_predict):
        triple = beams[j]
        # 從現有方案出發，各延伸出 BEAM_SIZE (本例為 8) 種後續方案
        for k in range(BEAM_SIZE):
            char_index = np.argmax(softmax_vec)
            new_prob = triple[0] + np.log(
                softmax_vec[char_index])
            new_letters = triple[1] + index_to_char[char_index]
            x = np.zeros(encoding_width)
            x[char_index] = 1
            new_one_hots = triple[2].copy()
            new_one_hots.append(x)
            new_beams.append((new_prob, new_letters,
                              new_one_hots))
            softmax_vec[char_index] = 0
    # 僅保留機率較高的前 BEAM_SIZE 種方案，其他均排除
    new_beams.sort(key=lambda tup: tup[0], reverse=True)
    beams = new_beams[0:BEAM_SIZE]

for item in beams:
    print(item[1])


the body and and th
the body and the th
the body and and an
the body the the th
the body and and in
the body and the to
the body and and to
the body and the so


程式5-13(註：此為程式片段，無法執行)

In [None]:
from tensorflow.keras.layers import Bidirectional
…(略)
model.add(Bidirectional(LSTM(16, activation='relu')))