<a href="https://colab.research.google.com/github/Ron-Wu/deep-learning-test/blob/main/RNN_01_RNN_basic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

In [None]:
# 自行假造資料
# Sample: 1 （batch size）
# Sequence Length: 5 （幾個字、幾個詞、幾個時間點的資料）
# feature dimension: 8 （）

inputs = tf.random.normal([1, 5, 8])

# **基本操作 tf.keras.layers.LSTM**

In [None]:
# 此方法是會拿到預設的最後一個output 所以是只有一個 3
output = layers.LSTM(units=3)(inputs) # input 經過 LSTM 後會輸出 3(units) 個數值

# 若要拿到每一個時間點的 output 則需打開 return_sequences
# output = layers.LSTM(units=3, return_sequences=True)(inputs)
print(output.shape)

(1, 3)


In [None]:
whole_seq_output, final_hiden_state, final_cell_state = layers.LSTM(3, 
                                                                    return_sequences=True, 
                                                                    return_state=True)(inputs) # return_state: 要不要拿最後一個時間點的 h 或 c

print('whole_seq_output: ', whole_seq_output.shape, whole_seq_output)
print('final_hiden_state (h): ', final_hiden_state.shape, final_hiden_state)
print('final_cell_state (c): ', final_cell_state.shape, final_cell_state)

whole_seq_output:  (1, 5, 3) tf.Tensor(
[[[ 0.06889173 -0.09235358  0.03698412]
  [ 0.08995336 -0.1472454   0.15110868]
  [ 0.10181598 -0.21942852 -0.16049385]
  [ 0.32207116 -0.11939575 -0.21545911]
  [ 0.5908419  -0.02395671  0.00924962]]], shape=(1, 5, 3), dtype=float32)
final_hiden_state (h):  (1, 3) tf.Tensor([[ 0.5908419  -0.02395671  0.00924962]], shape=(1, 3), dtype=float32)
final_cell_state (c):  (1, 3) tf.Tensor([[ 1.1102977  -0.05478533  0.09565377]], shape=(1, 3), dtype=float32)


#**基本操作 tf.keras.layers.GRU**

In [None]:
output = layers.GRU(units=3)(inputs)
print(output.shape)

(1, 3)


In [None]:
whole_sequence_output, final_state = layers.GRU(3, 
                                                return_sequences=True, 
                                                return_state=True)(inputs)

print('whole_seq_output: ', whole_sequence_output.shape, whole_sequence_output)
print('final_state (h): ', final_state.shape, final_state)

whole_seq_output:  (1, 5, 3) tf.Tensor(
[[[-0.02089138  0.18337712 -0.10861841]
  [-0.34097722  0.02825569 -0.01810227]
  [-0.4342845   0.18450493  0.19738375]
  [-0.09007354  0.28125226  0.14465015]
  [-0.31058463  0.28994632  0.002272  ]]], shape=(1, 5, 3), dtype=float32)
final_state (h):  (1, 3) tf.Tensor([[-0.31058463  0.28994632  0.002272  ]], shape=(1, 3), dtype=float32)


#**基本操作 tf.keras.layers.Bidirectional**

In [None]:
# 因為順向、逆向的ouput各有一個3，預設適用concat的方式加在一起，所以是3+3=6個
output = layers.Bidirectional(layers.LSTM(3))(inputs) 
print(output.shape)

(1, 6)


In [None]:
# merge_mode: Sum
# 可以藉由 merge_mode 把順向跟逆向直接加總
output = layers.Bidirectional(layers.LSTM(3), 
                              merge_mode='sum')(inputs)
print(output.shape)

(1, 3)


In [None]:
# return_sequences: True
# sequence順向＝10，sequence逆向＝10，加總是 20
output = layers.Bidirectional(layers.LSTM(10, return_sequences=True))(inputs)
print(output.shape)

(1, 5, 20)


In [None]:
# return_sequences: True, 
output, forward_h, forward_c, backward_h, backward_c = layers.Bidirectional(layers.LSTM(2, return_sequences=True, return_state=True))(inputs)
print('output : ', output.shape, output)
print('forward_h : ', forward_h.shape, forward_h)
print('forward_c : ', forward_c.shape, forward_c)
print('backward_h : ', backward_h.shape, backward_h)
print('backward_c : ', backward_c.shape, backward_c)


output :  (1, 5, 4) tf.Tensor(
[[[-0.08479135  0.4888602  -0.3043876   0.13769802]
  [ 0.05028825  0.23609684  0.13861199  0.03518084]
  [ 0.2617278  -0.00639235  0.09613106 -0.13443387]
  [ 0.17807148 -0.0501023   0.10512983  0.05433476]
  [ 0.3444419   0.10165996 -0.180297    0.13314675]]], shape=(1, 5, 4), dtype=float32)
forward_h :  (1, 2) tf.Tensor([[0.3444419  0.10165996]], shape=(1, 2), dtype=float32)
forward_c :  (1, 2) tf.Tensor([[0.7218553  0.23351777]], shape=(1, 2), dtype=float32)
backward_h :  (1, 2) tf.Tensor([[-0.3043876   0.13769802]], shape=(1, 2), dtype=float32)
backward_c :  (1, 2) tf.Tensor([[-0.40367544  0.2228234 ]], shape=(1, 2), dtype=float32)


## **Many-to-one**

In [None]:
input = layers.Input(shape=(5, 8))
x = layers.LSTM(2)(input)
ouput = layers.Dense(10, activation='softmax')(x)
model = models.Model(input, ouput)
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 5, 8)]            0         
_________________________________________________________________
lstm_11 (LSTM)               (None, 2)                 88        
_________________________________________________________________
dense (Dense)                (None, 10)                30        
Total params: 118
Trainable params: 118
Non-trainable params: 0
_________________________________________________________________


In [None]:
# multi-layer
# 疊多層模型，一定要把return_sequence打開，否則預設只會提供每層最後一個 output 值，會出錯
input = layers.Input(shape=(5, 8))
x1 = layers.LSTM(128, return_sequences=True)(input)
x2 = layers.LSTM(256, return_sequences=True)(x1)
x3 = layers.LSTM(10)(x2)
ouput = layers.Dense(10, activation='softmax')(x3)
model = models.Model(input, ouput)
model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 5, 8)]            0         
_________________________________________________________________
lstm_12 (LSTM)               (None, 5, 128)            70144     
_________________________________________________________________
lstm_13 (LSTM)               (None, 5, 256)            394240    
_________________________________________________________________
lstm_14 (LSTM)               (None, 10)                10680     
_________________________________________________________________
dense_1 (Dense)              (None, 10)                110       
Total params: 475,174
Trainable params: 475,174
Non-trainable params: 0
_________________________________________________________________


# **Many-to-many (same length)**

In [None]:
input = layers.Input(shape=(50, 8))
x = layers.LSTM(10, return_sequences=True)(input)
output = layers.Dense(20, activation='softmax')(x)
# output = layers.TimeDistributed(layers.Dense(20, activation='softmax'))(x)
model = models.Model(input, output)
model.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 50, 8)]           0         
_________________________________________________________________
lstm_15 (LSTM)               (None, 50, 10)            760       
_________________________________________________________________
dense_2 (Dense)              (None, 50, 20)            220       
Total params: 980
Trainable params: 980
Non-trainable params: 0
_________________________________________________________________


## Many-to-many (different length) : Seq2seq

In [None]:
input = layers.Input(shape=(50, 16))
x = layers.LSTM(1)(input)
model = models.Model(input, x)
model.summary()

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 50, 16)]          0         
_________________________________________________________________
lstm_16 (LSTM)               (None, 1)                 72        
Total params: 72
Trainable params: 72
Non-trainable params: 0
_________________________________________________________________
