In [0]:
import tensorflow as tf
from tensorflow.keras import layers, models

In [0]:
# Sample: 1
# Sequence Length: 5 , 
# feature dimension: 8 , 表示每一個時間點會把長度是 8 向量的長度丟進去給模型
# 如果要做的是影像, 8 
# one-hot encoding 可以把字體轉化成向量, word to vector 也是轉化成向量
inputs = tf.random.normal([1, 5, 8])

# tf.keras.layers.LSTM

In [3]:
output = layers.LSTM(units=3)(inputs)
print(output.shape)
# 1 指資料筆數有1比, 每個時間點output有3個向量

(1, 3)


In [4]:
whole_seq_output, final_hiden_state, final_cell_state = layers.LSTM(3, 
                                                                    return_sequences=True, 
                                                                    return_state=True)(inputs)

print('whole_seq_output: ', whole_seq_output.shape, whole_seq_output)
print('final_hiden_state (h): ', final_hiden_state.shape, final_hiden_state)
print('final_cell_state (c): ', final_cell_state.shape, final_cell_state)

whole_seq_output:  (1, 5, 3) tf.Tensor(
[[[ 0.0677882  -0.21087094 -0.0989236 ]
  [ 0.06288411 -0.1294672  -0.06691958]
  [ 0.05835059 -0.04514087 -0.12253632]
  [ 0.12794894 -0.40130848 -0.38835487]
  [ 0.07307947 -0.1388176  -0.26558414]]], shape=(1, 5, 3), dtype=float32)
final_hiden_state (h):  (1, 3) tf.Tensor([[ 0.07307947 -0.1388176  -0.26558414]], shape=(1, 3), dtype=float32)
final_cell_state (c):  (1, 3) tf.Tensor([[ 0.24009112 -0.32717627 -0.3491059 ]], shape=(1, 3), dtype=float32)


# tf.keras.layers.GRU

In [0]:
# LSTM 跟 GRU 的用法類似
inputs = tf.random.normal([1, 5, 8])
output = layers.GRU(units=3)(inputs)
print(output.shape)

(1, 3)


In [5]:
# erturn_sequences, return_state 都打開 
whole_sequence_output, final_state = layers.GRU(3, 
                                                return_sequences=True, 
                                                return_state=True)(inputs)

print('whole_seq_output: ', whole_sequence_output.shape, whole_sequence_output)
print('final_state (h): ', final_state.shape, final_state)

whole_seq_output:  (1, 5, 3) tf.Tensor(
[[[-0.18241197 -0.55939376  0.449733  ]
  [ 0.3694474  -0.406887   -0.02468668]
  [ 0.57061267 -0.3379681   0.28261572]
  [ 0.45434126 -0.05053666  0.73105246]
  [ 0.600995    0.0487359   0.27398178]]], shape=(1, 5, 3), dtype=float32)
final_state (h):  (1, 3) tf.Tensor([[0.600995   0.0487359  0.27398178]], shape=(1, 3), dtype=float32)


# tf.keras.layers.Bidirectional

In [7]:
# 可以接合不同的網路層, 可以包裝不同的RNN的網路層, 裏面包 LSTM 的寫法
# 順向有一個向量 10, 逆向有一個向量 10, 兩者合在一起就是 20個向量
output = layers.Bidirectional(layers.LSTM(10), merge_mode='concat')(inputs)
print(output.shape)

(1, 20)


In [8]:
# merge_mode: Sum
output = layers.Bidirectional(layers.LSTM(10), merge_mode='sum')(inputs)
print(output.shape)

(1, 10)


In [9]:
# return_sequences: True
output = layers.Bidirectional(layers.LSTM(10, return_sequences=True))(inputs)
print(output.shape)

(1, 5, 20)


In [10]:
# return_sequences: True, 
output, forward_h, forward_c, backward_h, backward_c = layers.Bidirectional(layers.LSTM(2, return_sequences=True, return_state=True))(inputs)
print('output : ', output.shape, output)
print('forward_h : ', forward_h.shape, forward_h)
print('forward_c : ', forward_c.shape, forward_c)
print('backward_h : ', backward_h.shape, backward_h)
print('backward_c : ', backward_c.shape, backward_c)


output :  (1, 5, 4) tf.Tensor(
[[[-0.00398363 -0.05402295  0.2331119  -0.18791509]
  [-0.29531315 -0.28845802 -0.12860148  0.00712839]
  [-0.03452276 -0.28813103  0.03765422 -0.01030171]
  [-0.02302204 -0.10902112  0.38550964 -0.00540784]
  [-0.12485908 -0.01887606 -0.12214801  0.18091883]]], shape=(1, 5, 4), dtype=float32)
forward_h :  (1, 2) tf.Tensor([[-0.12485908 -0.01887606]], shape=(1, 2), dtype=float32)
forward_c :  (1, 2) tf.Tensor([[-0.4735272  -0.09810753]], shape=(1, 2), dtype=float32)
backward_h :  (1, 2) tf.Tensor([[ 0.2331119  -0.18791509]], shape=(1, 2), dtype=float32)
backward_c :  (1, 2) tf.Tensor([[ 0.41434836 -0.45825845]], shape=(1, 2), dtype=float32)


## Many-to-one

In [11]:
input = layers.Input(shape=(5, 8))
x = layers.LSTM(10)(input)
ouput = layers.Dense(10)(x)
model = models.Model(input, ouput)
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 5, 8)]            0         
_________________________________________________________________
lstm_7 (LSTM)                (None, 10)                760       
_________________________________________________________________
dense (Dense)                (None, 10)                110       
Total params: 870
Trainable params: 870
Non-trainable params: 0
_________________________________________________________________


In [12]:
# multi-layer, 堆疊兩層 LSTM 
input = layers.Input(shape=(5, 8))
x = layers.LSTM(10, return_sequences=True)(input)
x = layers.LSTM(10)(x)
ouput = layers.Dense(10)(x)
model = models.Model(input, ouput)
model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 5, 8)]            0         
_________________________________________________________________
lstm_8 (LSTM)                (None, 5, 10)             760       
_________________________________________________________________
lstm_9 (LSTM)                (None, 10)                840       
_________________________________________________________________
dense_1 (Dense)              (None, 10)                110       
Total params: 1,710
Trainable params: 1,710
Non-trainable params: 0
_________________________________________________________________


# Many-to-many (same length)

In [13]:
input = layers.Input(shape=(50, 8))
x = layers.LSTM(10, return_sequences=True)(input)
ouput = layers.TimeDistributed(layers.Dense(20))(x)
model = models.Model(input, ouput)
model.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 50, 8)]           0         
_________________________________________________________________
lstm_10 (LSTM)               (None, 50, 10)            760       
_________________________________________________________________
time_distributed (TimeDistri (None, 50, 20)            220       
Total params: 980
Trainable params: 980
Non-trainable params: 0
_________________________________________________________________


## Many-to-many (different length) : Seq2seq