程式7-1

In [None]:
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model

# 宣告輸入層
inputs = Input(shape=(10,))

# 宣告各神經層
layer1 = Dense(64, activation='relu')
layer2 = Dense(64, activation='relu')

# 連接輸入層與各神經層
layer1_outputs = layer1(inputs)
layer2_outputs = layer2(layer1_outputs)

# 建立模型
model = Model(inputs=inputs, outputs=layer2_outputs)
model.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 10)]              0         
                                                                 
 dense_4 (Dense)             (None, 64)                704       
                                                                 
 dense_5 (Dense)             (None, 64)                4160      
                                                                 
Total params: 4864 (19.00 KB)
Trainable params: 4864 (19.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


程式 7-2

In [None]:
# 宣告兩組輸入
inputs = Input(shape=(10,))
bypass_inputs = Input(shape=(5,))

# 宣告各神經層
layer1 = Dense(64, activation='relu')
concat_layer = Concatenate()
layer2 = Dense(64, activation='relu')

# 連接輸入層與各神經層
layer1_outputs = layer1(inputs)
layer2_inputs = concat_layer([layer1_outputs, bypass_inputs])
layer2_outputs = layer2(layer2_inputs)

# 建構模型
model = Model(inputs=[inputs, bypass_inputs],outputs=layer2_outputs)
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 10)]         0           []                               
                                                                                                  
 dense_2 (Dense)                (None, 64)           704         ['input_2[0][0]']                
                                                                                                  
 input_3 (InputLayer)           [(None, 5)]          0           []                               
                                                                                                  
 concatenate (Concatenate)      (None, 69)           0           ['dense_2[0][0]',                
                                                                  'input_3[0][0]']          

程式7-3

In [None]:
import numpy as np
import random
import os
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.text \
    import text_to_word_sequence
from tensorflow.keras.preprocessing.sequence \
    import pad_sequences
from tensorflow.keras.layers import Concatenate
import tensorflow as tf
import logging
tf.get_logger().setLevel(logging.ERROR)


程式7-4

In [None]:
# 定義各種超參數以及常數
EPOCHS = 20
BATCH_SIZE = 128
MAX_WORDS = 10000
READ_LINES = 60000
LAYER_SIZE = 256
EMBEDDING_WIDTH = 128
TEST_PERCENT = 0.2
SAMPLE_SIZE = 20
OOV_WORD = 'UNK'
PAD_INDEX = 0
OOV_INDEX = 1
START_INDEX = MAX_WORDS - 2
STOP_INDEX = MAX_WORDS - 1
MAX_LENGTH = 60
SRC_DEST_FILE_NAME = '/content/fra.txt'
#SRC_DEST_FILE_NAME = path_head+'data/fra.txt'


程式 7-5

In [None]:
# 定義一個讀取檔案的函式
def read_file_combined(file_name, max_len):
    file = open(file_name, 'r', encoding='utf-8')
    src_word_sequences = []
    dest_word_sequences = []
    for i, line in enumerate(file):
        if i == READ_LINES:
            break
        pair = line.split('\t')
        word_sequence = text_to_word_sequence(pair[1])
        src_word_sequence = word_sequence[0:max_len]
        src_word_sequences.append(src_word_sequence)
        word_sequence = text_to_word_sequence(pair[0])
        dest_word_sequence = word_sequence[0:max_len]
        dest_word_sequences.append(dest_word_sequence)
    file.close()
    return src_word_sequences, dest_word_sequences


程式7-6

In [None]:
# 單字與編號序列互換函式

#單字轉編號
def tokenize(sequences):
     # "MAX_WORDS-2"是為了保留兩編號
     # 給 START、STOP 標記
    tokenizer = Tokenizer(num_words=MAX_WORDS-2,
                          oov_token=OOV_WORD)
    tokenizer.fit_on_texts(sequences)
    token_sequences = tokenizer.texts_to_sequences(sequences)
    return tokenizer, token_sequences

#編號轉單字
def tokens_to_words(tokenizer, seq):
    word_seq = []
    for index in seq:
        if index == PAD_INDEX:
            word_seq.append('PAD')
        elif index == OOV_INDEX:
            word_seq.append(OOV_WORD)
        elif index == START_INDEX:
            word_seq.append('START')
        elif index == STOP_INDEX:
            word_seq.append('STOP')
        else:
            word_seq.append(tokenizer.sequences_to_texts(
                [[index]])[0])
    print(word_seq)


程式7-7

In [None]:
# 讀取檔案並斷字
src_seq, dest_seq = read_file_combined(SRC_DEST_FILE_NAME,
                    MAX_LENGTH)
src_tokenizer, src_token_seq = tokenize(src_seq)
dest_tokenizer, dest_token_seq = tokenize(dest_seq)


In [None]:
print(src_seq[9999])
print(dest_seq[9999])

['quel', 'fiasco']
['what', 'a', 'fiasco']


In [None]:
dest_token_seq[9999]

[35, 5, 3807]

程式7-8

In [None]:
# 準備訓練集 (測試集會從訓練集切割一部分出來)

dest_target_token_seq = [x + [STOP_INDEX] for x in dest_token_seq]
dest_input_token_seq = [[START_INDEX] + x for x in
                        dest_target_token_seq]
src_input_data = pad_sequences(src_token_seq)
dest_input_data = pad_sequences(dest_input_token_seq,
                                padding='post')
dest_target_data = pad_sequences(
    dest_target_token_seq, padding='post', maxlen
    = len(dest_input_data[0]))


In [None]:
print(src_input_data[9999])
print(dest_input_data[9999])
print(dest_target_data[9999])


[   0    0    0    0    0    0    0    0    0    0    0    0  136 6226]
[9998   35    5 3807 9999    0    0    0    0]
[  35    5 3807 9999    0    0    0    0    0]


In [None]:
dest_target_token_seq[9999]

[35, 5, 3807, 9999]

程式7-9

In [None]:
# 將資料集拆成訓練集與測試集兩塊
rows = len(src_input_data[:,0])
all_indices = list(range(rows))
test_rows = int(rows * TEST_PERCENT)
test_indices = random.sample(all_indices, test_rows)
train_indices = [x for x in all_indices if x not in test_indices]

train_src_input_data = src_input_data[train_indices]
train_dest_input_data = dest_input_data[train_indices]
train_dest_target_data = dest_target_data[train_indices]

test_src_input_data = src_input_data[test_indices]
test_dest_input_data = dest_input_data[test_indices]
test_dest_target_data = dest_target_data[test_indices]

#從測試集隨機抽出 20 筆 (SAMPLE_SIZE) 樣本 / 正解
test_indices = list(range(test_rows))
sample_indices = random.sample(test_indices, SAMPLE_SIZE)
sample_input_data = test_src_input_data[sample_indices]
sample_target_data = test_dest_target_data[sample_indices]


程式7-10

In [None]:
# 建構編碼器模型
# 輸入資料是原句 (法語)序列
enc_embedding_input = Input(shape=(None, ))

# 建立編碼器各層
enc_embedding_layer = Embedding(
    output_dim=EMBEDDING_WIDTH, input_dim
    = MAX_WORDS, mask_zero=True)
enc_layer1 = LSTM(LAYER_SIZE, return_state=True,
                  return_sequences=True)
enc_layer2 = LSTM(LAYER_SIZE, return_state=True)

# 建立編碼器各層
# 最末層輸出會被捨棄，僅保留最終內部狀態c 與 h 給解碼器
enc_embedding_layer_outputs = \
    enc_embedding_layer(enc_embedding_input)
enc_layer1_outputs, enc_layer1_state_h, enc_layer1_state_c = \
    enc_layer1(enc_embedding_layer_outputs)
_, enc_layer2_state_h, enc_layer2_state_c = \
    enc_layer2(enc_layer1_outputs)

# 建構模型
enc_model = Model(enc_embedding_input,
                  [enc_layer1_state_h, enc_layer1_state_c,
                   enc_layer2_state_h, enc_layer2_state_c])
enc_model.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None)]            0         
                                                                 
 embedding (Embedding)       (None, None, 128)         1280000   
                                                                 
 lstm (LSTM)                 [(None, None, 256),       394240    
                              (None, 256),                       
                              (None, 256)]                       
                                                                 
 lstm_1 (LSTM)               [(None, 256),             525312    
                              (None, 256),                       
                              (None, 256)]                       
                                                                 
Total params: 2199552 (8.39 MB)
Trainable params: 2199552 (8.

程式7-11

In [None]:
# 建構解碼器模型
# 輸入資料為譯句序列
# 與編碼器傳來的 thought vector
dec_layer1_state_input_h = Input(shape=(LAYER_SIZE,))
dec_layer1_state_input_c = Input(shape=(LAYER_SIZE,))
dec_layer2_state_input_h = Input(shape=(LAYER_SIZE,))
dec_layer2_state_input_c = Input(shape=(LAYER_SIZE,))
dec_embedding_input = Input(shape=(None, ))

# 建立解碼器各層
dec_embedding_layer = Embedding(output_dim=EMBEDDING_WIDTH,
                                input_dim=MAX_WORDS,
                                mask_zero=True)
dec_layer1 = LSTM(LAYER_SIZE, return_state = True,
                  return_sequences=True)
dec_layer2 = LSTM(LAYER_SIZE, return_state = True,
                  return_sequences=True)
dec_layer3 = Dense(MAX_WORDS, activation='softmax')

# 連接解碼器各層
dec_embedding_layer_outputs = dec_embedding_layer(
    dec_embedding_input)
dec_layer1_outputs, dec_layer1_state_h, dec_layer1_state_c = \
    dec_layer1(dec_embedding_layer_outputs,
    initial_state=[dec_layer1_state_input_h,
                   dec_layer1_state_input_c])
dec_layer2_outputs, dec_layer2_state_h, dec_layer2_state_c = \
    dec_layer2(dec_layer1_outputs,
    initial_state=[dec_layer2_state_input_h,
                   dec_layer2_state_input_c])
dec_layer3_outputs = dec_layer3(dec_layer2_outputs)

# 建構模型
dec_model = Model([dec_embedding_input,
                   dec_layer1_state_input_h,
                   dec_layer1_state_input_c,
                   dec_layer2_state_input_h,
                   dec_layer2_state_input_c],
                  [dec_layer3_outputs, dec_layer1_state_h,
                   dec_layer1_state_c, dec_layer2_state_h,
                   dec_layer2_state_c])
dec_model.summary()


Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_6 (InputLayer)        [(None, None)]               0         []                            
                                                                                                  
 embedding_1 (Embedding)     (None, None, 128)            1280000   ['input_6[0][0]']             
                                                                                                  
 input_2 (InputLayer)        [(None, 256)]                0         []                            
                                                                                                  
 input_3 (InputLayer)        [(None, 256)]                0         []                            
                                                                                            

程式7-12

In [None]:
# 建構並編譯整個訓練模型

train_enc_embedding_input = Input(shape=(None, ))
train_dec_embedding_input = Input(shape=(None, ))
intermediate_state = enc_model(train_enc_embedding_input)
train_dec_output, _, _, _, _ = dec_model(
    [train_dec_embedding_input] +
    intermediate_state)
training_model = Model([train_enc_embedding_input,
                        train_dec_embedding_input],
                        train_dec_output)
optimizer = RMSprop(lr=0.01)
training_model.compile(loss='sparse_categorical_crossentropy',
                       optimizer=optimizer, metrics =['accuracy'])
training_model.summary()




Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_7 (InputLayer)        [(None, None)]               0         []                            
                                                                                                  
 input_8 (InputLayer)        [(None, None)]               0         []                            
                                                                                                  
 model (Functional)          [(None, 256),                2199552   ['input_7[0][0]']             
                              (None, 256),                                                        
                              (None, 256),                                                        
                              (None, 256)]                                                  

程式 7-13

In [None]:
# 用雙層 for 迴圈交替進行訓練與測試工作

for i in range(EPOCHS):
    print('Epoch: ' , i)

    # 模型訓練一個週期
    history = training_model.fit(
        [train_src_input_data, train_dest_input_data],
        train_dest_target_data, validation_data=(
            [test_src_input_data, test_dest_input_data],
            test_dest_target_data), batch_size=BATCH_SIZE,
        epochs=1)

    # 將事先挑出的測試樣本送入模型，生成其譯句
    for (test_input, test_target) in zip(sample_input_data,
                                         sample_target_data):
        # 將一原句輸入編碼器
        x = np.reshape(test_input, (1, -1))
        last_states = enc_model.predict(
            x, verbose=0)

        # 將最終內部狀態 (即 thought vector) 與 START_INDEX 一併輸入解碼器
        prev_word_index = START_INDEX
        produced_string = ''
        pred_seq = []
        for j in range(MAX_LENGTH):
            x = np.reshape(np.array(prev_word_index), (1, 1))

            # 生成單字、記錄此時內部狀態
            preds, dec_layer1_state_h, dec_layer1_state_c, \
                dec_layer2_state_h, dec_layer2_state_c = \
                    dec_model.predict(
                        [x] + last_states, verbose=0)
            last_states = [dec_layer1_state_h,
                           dec_layer1_state_c,
                           dec_layer2_state_h,
                           dec_layer2_state_c]

            # 挑出可能性最高的單字
            prev_word_index = np.asarray(preds[0][0]).argmax()
            pred_seq.append(prev_word_index)
            if prev_word_index == STOP_INDEX:
                break
        tokens_to_words(src_tokenizer, test_input)
        tokens_to_words(dest_tokenizer, test_target)
        tokens_to_words(dest_tokenizer, pred_seq)
        print('\n\n')


step:  0
['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'pas', 'un', 'son', 'ne', 'pouvait', 'être', 'entendu']
['not', 'a', 'sound', 'was', 'heard', 'STOP', 'PAD', 'PAD', 'PAD']
['no', 'only', 'hurt', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'nous', 'voici\u202f']
['here', 'we', 'are', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']
["we'll", 'get', 'up', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'vous', 'devez', 'être', 'prudentes']
['you', 'must', 'be', 'careful', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD']
['you', 'must', 'be', 'right', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'nous', 'mangerons', 'ton', 'pain']
["we'll", 'eat', 'your', 'bread', 'STOP', 'PAD', 'PAD', 'PAD', 'PAD']
["we'll", 'get', 'your', 'key', 'STOP']



['PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'tu', 'es', 'objectif']
["you're", 'objective', 'STOP', 'PA