In [45]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [46]:
from tensorflow import keras
from tensorflow.keras import Input, layers, models

In [47]:
#函數式API(functional API)
input_tensor = Input(shape=(32, )) #建立一個輸入張量shape=(?, 32)，因為不包含張量有幾筆(batch)，若一開始就要定義資料有幾筆可使用batch_shape
print(input_tensor.shape)

dense = layers.Dense(16, activation='relu') #建立一個dense層
output_tensor = dense(input_tensor) #將張量傳入層函數，回傳輸出張量
print(output_tensor.shape) #Dense層的輸出張量shape=(?, 16)

(None, 32)
(None, 16)


In [48]:
#序列式(Sequential) vs. 函數式(Functional) API

#Sequential
seq_model = models.Sequential()
seq_model.add(layers.Dense(32, activation='relu', input_shape=(64,)))
seq_model.add(layers.Dense(32, activation='relu'))
seq_model.add(layers.Dense(10, activation='softmax'))

#Functional
input_tensor = Input(shape=(64,)) #建立一個初始向量
x = layers.Dense(32, activation='relu')(input_tensor)
y = layers.Dense(32, activation='relu')(x)
output_tensor = layers.Dense(10, activation='softmax')(y)
#Model類別用初始的輸入張量和最後的輸出張量來取得模型物件
model = models.Model(input_tensor, output_tensor)

In [49]:
model.summary()

Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         [(None, 64)]              0         
_________________________________________________________________
dense_22 (Dense)             (None, 32)                2080      
_________________________________________________________________
dense_23 (Dense)             (None, 32)                1056      
_________________________________________________________________
dense_24 (Dense)             (None, 10)                330       
Total params: 3,466
Trainable params: 3,466
Non-trainable params: 0
_________________________________________________________________


In [50]:
#編譯模型
model.compile(optimizer='rmsprop',
             loss='categorical_crossentropy')

In [51]:
#產生Numpy訓練資料
x_train = np.random.random((1000, 64)) #產生輸入資料(問題)
y_train = np.random.random((1000, 10)) #產生標籤資料(正確答案)

In [52]:
#訓練模型
model.fit(x_train, y_train,
         epochs=10,
         batch_size=128)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7faa54be9750>

In [53]:
score = model.evaluate(x_train, y_train) #因為無metrics，故只回傳loss value



In [54]:
print(score)

25.0384464263916


In [55]:
#多輸入模型
#以Functional API實作雙輸入問答模型:
#典型的問答模型有兩個輸入:問題與參考文字資料，
#各自透過Embedding層和LSTM層編碼成表示向量(嵌入向量)後，在串接層串接這些向量，傳給Dense層(softmax分類器)進行分類，輸出答案

In [56]:
text_vocabulary_size = 10000
question_vocabulary_size = 10000
answer_vocabulary_size = 500

#處理"參考文字"輸入
text_input = Input(shape=(None, ), dtype='int32', name='text') #shape=(None, )代表不限定張量的shape大小，文字輸入可以是可變長度的整數序列
                                                #可以選擇是否為輸入命名，建立dict:{'text':Numpy資料}
embedded_text = layers.Embedding(text_vocabulary_size, 64)(text_input) #將輸入送入嵌入層，編碼成大小64的文字嵌入向量
print(embedded_text.shape) #shape=(?,?,64)
encoded_text = layers.LSTM(32)(embedded_text) #透過LTSM層將向量序列編碼成單一個向量
print(encoded_text.shape) #shape=(?,32)

#處理"問題"輸入
question_input = Input(shape=(None, ), dtype='int32', name='question') #shape=(None, )代表不限定張量的shape大小，文字輸入可以是可變長度的整數序列
                                                #可以選擇是否為輸入命名，建立dict:{'question':Numpy資料}
embedded_question = layers.Embedding(question_vocabulary_size, 32)(question_input) #將輸入送入嵌入層，編碼成大小32的文字嵌入向量
print(embedded_question.shape) #shape=(?,?,32)
encoded_question = layers.LSTM(16)(embedded_question) #透過LTSM層將向量序列編碼成單一個向量
print(encoded_question.shape) #shape=(?,16)

(None, None, 64)
(None, 32)
(None, None, 32)
(None, 16)


In [57]:
#串接編碼後的"問題"和"參考文字"資料(向量)，將兩份資料合而為一
concatenated = layers.concatenate([encoded_question, encoded_text], axis=-1) #axis參數為-1代表以輸入的最後一軸進行串接
print(concatenated.shape) #shape=(?,48)

answer = layers.Dense(answer_vocabulary_size, activation='softmax')(concatenated) #最後增加Dense層(softmax分類器)，將串接向量送入，輸出模型的結果向量(answer)
print(answer.shape) #shape=(?,500) 500種可能的答案

(None, 48)
(None, 500)


In [58]:
model = models.Model([text_input, question_input], answer)
#                    [   輸入1   ,     輸入2     ],  輸出

In [59]:
model.compile(optimizer='rmsprop',
             loss='categorical_crossentropy',
             metrics=['acc'])

In [60]:
model.summary()

Model: "model_6"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
question (InputLayer)           [(None, None)]       0                                            
__________________________________________________________________________________________________
text (InputLayer)               [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding_8 (Embedding)         (None, None, 32)     320000      question[0][0]                   
__________________________________________________________________________________________________
embedding_7 (Embedding)         (None, None, 64)     640000      text[0][0]                       
____________________________________________________________________________________________

In [61]:
#準備模型Numpy陣列資料，選擇以下兩種方式進行訓練(fitting):
#1.將Numpy陣列組成list作為輸入，進行訓練
#2.建立一個字典，將輸入透過鍵(輸入名稱)，對應到值(Numpy陣列資料)，此方法只有在為輸入命名時才可用

In [62]:
num_samples = 1000
max_length = 100

#產生虛擬text資料:1000筆，每筆100個字(數字)
text = np.random.randint(1, text_vocabulary_size, size=(num_samples, max_length))
print(text.shape) #shape=(1000,100) 1000筆資料，每筆100個字

#產生虛擬question資料:1000筆，每筆100個字(數字)
question = np.random.randint(1, question_vocabulary_size, size=(num_samples, max_length))
print(question.shape) #shape=(1000,100) 1000筆資料，每筆100個字

#產生虛擬answer資料，需為one-hot編碼:1000個正確答案
answers = np.zeros(shape=(num_samples, answer_vocabulary_size), dtype='int32')
for answer in answers:
    answer[np.random.randint(answer_vocabulary_size)] = 1 #其中一位設為1
print(answer.shape) #shape=(1000, 500) 共1000筆資料，每筆可能有500種答案

(1000, 100)
(1000, 100)
(500,)


In [63]:
#方法1:使用lsit送入資料進行訓練
model.fit([text, question], answers,
         epochs=10,
         batch_size=128)

#方法2:使用dict送入資料進行訓練，鍵:input層的名稱，值:Numpy資料
model.fit({'text': text, 'question': question}, answers, 
         epochs=10,
         batch_size=128)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7faa440d5c90>

In [64]:
#多輸出模型
#以一個神經網路預測資料中的不同屬性
#以Functional API實作三個輸出結果的模型
vocabulary_size = 50000 #文章大小
num_income_groups = 10 #將收入分成10群

posts_input = Input(shape=(None,), dtype='int32', name='posts') #不限定輸入向量的shape大小

#用Functional API將輸入向量傳入Embedding層，得到維度256的嵌入向量
embedding_posts = layers.Embedding(vocabulary_size, 256)(posts_input)
print(embedding_posts.shape) #shape=(?,?,256)

#以Functional API將嵌入向量傳入一層層中進行處理
x = layers.Conv1D(128, 5, activation='relu')(embedding_posts)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(128, activation='relu')(x)
print(x.shape) #shape=(?, 128)

(None, None, 256)
(None, 128)


In [65]:
#將x向量分別送入3個輸出層
#注意: 需為輸出層指定名稱

#預測年紀的輸出層: 純量回歸任務
age_prediction = layers.Dense(1, name='age')(x)

#預測收入族群的輸出層多分類任務(10類)
income_prediction = layers.Dense(num_income_groups, activation='softmax', name='income')(x)

#預測性別的輸出層: 二元分類任務
gender_prediction = layers.Dense(1, activation='sigmoid', name='gender')(x)

In [66]:
#用輸入向量與輸出向量實例化Model物件
model = models.Model(posts_input, [age_prediction, income_prediction, gender_prediction])
#                        輸入   ,       輸出1     ,        輸出2      ,       輸出3

In [67]:
#訓練這樣的模型，需為神經網路的不同輸出指定不同的損失函數來計算損失值，但由於梯度下降要求純量最小化，必須將這些損失值結合成單一數值才能訓練模型
#在編譯時使用損失串列(loss list)或損失字典(loss dict)，來為不同的輸出指定不同的損失函數，最後產生的損失值總和，稱為"全域損失(global loss)"
#目標是在訓練過程中最小化全域損失

#編譯方式1: 使用損失串列(loss list)
model.compile(optimizer='rmsprop',
             loss=['mse', 'categorical_crossentropy', 'binary_crossentropy']) #需按照層的建立順序

#編譯方式2: 使用損失字典(loss dict)
model.compile(optimizer='rmsprop',
             loss={'age': 'mse', 'income': 'categorical_crossentropy', 'gender': 'binary_crossentropy'}) #需為輸出層指定名稱

In [68]:
#若有非常不平衡的損失會導致模型優先針對最大損失的任務進行優化，而犧牲其他任務
#因此可加入loss_weigths參數，為損失值分配不同程度的重要性，尤其損失值使用不同單位時特別有用
#ex: 均方誤差(MSE)損失通常取數值3~5，交叉商(cross-entropy)損失可低至0.1

#編譯方式1: 使用損失串列(loss list)
model.compile(optimizer='rmsprop',
             loss=['mse', 'categorical_crossentropy', 'binary_crossentropy'], #需按照層的建立順序
             loss_weights=[0.25, 1., 10.]) #以層的順序指定損失權重

#編譯方式2: 使用損失字典(loss dict)
model.compile(optimizer='rmsprop',
             loss={'age': 'mse', 'income': 'categorical_crossentropy', 'gender': 'binary_crossentropy'}, #需為輸出層指定名稱
             loss_weights={'age': 0.25, 'income': 1., 'gender': 10.}) #以層的名稱來指定損失權重

In [None]:
#可透過串列(lsit)或字典(dict)將Numpy資料傳入模型進行訓練

#方法1:使用lsit送入資料進行訓練
model.fit(posts, [ages_targets, income_targets, gender_targets],
         epochs=10,
         batch_size=64)

#方法2:使用dict送入資料進行訓練，鍵:input層的名稱，值:Numpy資料
model.fit(posts, {'age': age_targets, 'income': income_targets, 'gender': gender_targets}, 
         epochs=10,
         batch_size=64)