<a href="https://colab.research.google.com/github/Syilun/TibameAI04/blob/master/RNN_family.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

In [None]:
# Sample: 1
# Sequence Length: 5
# feature dimension: 8
# 如何決定input & output shape
# batchsize = sample

inputs = tf.random.normal([1, 5, 8])

# tf.keras.layers.LSTM

In [None]:
output = layers.LSTM(units=3)(inputs)
print(output.shape)

# units
# 就像 dense layer 的神經元數目
# 決定output向量長度是多少
# 每一個時間點向量長度8，總共餵五次
# output.shape = 1,3 
# 1=batchsize, 3=output feature dimension
# 得到的結果是 1 個向量長度3的output

(1, 3)


In [None]:
whole_seq_output, final_hiden_state, final_cell_state = layers.LSTM(3, return_sequences=True, return_state=True)(inputs)

# return_sequences 意思是 output其實都會有結果，預設為False，當設為True，會得到所有output 
# return_state 預設也是False，當設為True，會給你多兩個state
# final_hiden_state 也就是(1,3)那個output
# final_cell_state 意思是C_t也會丟給你，它的形狀也是(1,3)


print('whole_seq_output: ', whole_seq_output.shape, whole_seq_output)
print('final_hiden_state (h): ', final_hiden_state.shape, final_hiden_state)
print('final_cell_state (c): ', final_cell_state.shape, final_cell_state)

whole_seq_output:  (1, 5, 3) tf.Tensor(
[[[ 0.01134522  0.0120947   0.24948213]
  [ 0.11279607 -0.05773085 -0.04871291]
  [ 0.0489895   0.16063593  0.01779252]
  [ 0.36228287  0.21084888  0.38239726]
  [ 0.04803879  0.30942777  0.22481783]]], shape=(1, 5, 3), dtype=float32)
final_hiden_state (h):  (1, 3) tf.Tensor([[0.04803879 0.30942777 0.22481783]], shape=(1, 3), dtype=float32)
final_cell_state (c):  (1, 3) tf.Tensor([[0.17377621 0.38481298 0.31572753]], shape=(1, 3), dtype=float32)


# tf.keras.layers.GRU

In [None]:
output = layers.GRU(units=3)(inputs)
print(output.shape)



(1, 3)


In [None]:
whole_sequence_output, final_state = layers.GRU(3, return_sequences=True, return_state=True)(inputs)

# 一樣有return_sequences & return_state
# 但重點是: GRU沒有 final_cell_state(只有LSTM有)

print('whole_seq_output: ', whole_sequence_output.shape, whole_sequence_output)
print('final_state (h): ', final_state.shape, final_state)

whole_seq_output:  (1, 5, 3) tf.Tensor(
[[[ 0.1339913  -0.24811737  0.24841756]
  [-0.01752811  0.36379862 -0.1139631 ]
  [-0.2599124  -0.18762621 -0.8300888 ]
  [-0.24967541 -0.44094458 -0.5778853 ]
  [ 0.21719603 -0.5088333  -0.32861453]]], shape=(1, 5, 3), dtype=float32)
final_state (h):  (1, 3) tf.Tensor([[ 0.21719603 -0.5088333  -0.32861453]], shape=(1, 3), dtype=float32)


# tf.keras.layers.Bidirectional

In [None]:
output = layers.Bidirectional(layers.LSTM(10))(inputs)
print(output.shape)

# 順向逆向都是每個長度為10的向量的LSTM
# 右邊為不修改參數預設RETURN的結果

# merge_mode 預設為concat
# merge_mode可以選擇要做CONCAT的連接，還是點對點的相加
# 如果是CONCAT output為長度20向量
# 如果是SUM output為長度10向量


(1, 20)


NameError: ignored

In [None]:
# merge_mode: Sum
output = layers.Bidirectional(layers.LSTM(10), merge_mode='sum')(inputs)
print(output.shape)

In [None]:
# return_sequences: True
output = layers.Bidirectional(layers.LSTM(10, return_sequences=True))(inputs)
print(output.shape)

In [None]:
# return_sequences: True, 
# 逆向順向都有h & c 的參數，所以return_sequences: True時,有4個
output, forward_h, forward_c, backward_h, backward_c = layers.Bidirectional(layers.LSTM(2, return_sequences=True, return_state=True))(inputs)
print('output : ', output.shape, output)
print('forward_h : ', forward_h.shape, forward_h)
print('forward_c : ', forward_c.shape, forward_c)
print('backward_h : ', backward_h.shape, backward_h)
print('backward_c : ', backward_c.shape, backward_c)


## Many-to-one

In [None]:
# shape = 要看幾個字,幾天的資料，資料向量長度
# 設定output的向量長度(units)為128

input = layers.Input(shape=(5, 8))
x = layers.LSTM(128)(input)
ouput = layers.Dense(10, activation='softmax')(x)
model = models.Model(input, ouput)
model.summary()

In [None]:
# multi-layer
# 多搭幾層的LSTM
# 只要進入下一層，必定要設定return_sequences=True，這樣才有對應的LSTM去做對接

input = layers.Input(shape=(5, 8))
x = layers.LSTM(128, return_sequences=True)(input)
x = layers.LSTM(10)(x)
ouput = layers.Dense(10)(x)
model = models.Model(input, ouput)
model.summary()

# Many-to-many (same length)

In [None]:
# LSTM的output 跟目標分類結果可能不同，所以我們在每個時間點接一個Dense()

input = layers.Input(shape=(50, 8))
x = layers.LSTM(10, return_sequences=True)(input)
ouput = layers.TimeDistributed(layers.Dense(20))(x)
model = models.Model(input, ouput)
model.summary()

## Many-to-many (different length) : Seq2seq