# 020. LSTM/GRU input/output shape

- return_sequences = False, True 일 때의 output 비교

- return_state = False, True 일 때의 internal state output 비교

- Bidirectional LSTM/GRU 의 output 비교

In [1]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Bidirectional
import numpy as np
import warnings
warnings.filterwarnings('ignore')

B = 2   # batch size
T = 5   #Time Steps
D = 1   #features
U = 3   #LSTM units

X = np.random.randn(B, T, D)
print(X.shape)

(2, 5, 1)


# LSTM

## return_sequences

- False (default) - last time step 의 output 만 반환
- True - 모든 timestep 의 output 을 모두 반환

<img src="https://i.imgur.com/yqTBCG5.png" width=600 />

In [2]:
def lstm(return_sequences=False):
    inp = Input(shape=(T, D))
    out = LSTM(U, return_sequences=return_sequences)(inp)

    model = Model(inputs=inp, outputs=out)
    return model.predict(X)

print("---- return_sequences=False ----> last timestep 의 output 만 반환")
lstm_out = lstm(return_sequences=False)
print(lstm_out.shape)
print(lstm_out)

print("\n---- return_sequences=True ----> 모든 timestep 별 output 출력")
lstm_out = lstm(return_sequences=True)
print(lstm_out.shape)
print(lstm_out)

---- return_sequences=False ----> last timestep 의 output 만 반환
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 262ms/step
(2, 3)
[[-0.05592164 -0.04728273  0.07290111]
 [-0.17082204 -0.08322322  0.15329002]]

---- return_sequences=True ----> 모든 timestep 별 output 출력
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 188ms/step
(2, 5, 3)
[[[ 0.04526417  0.02893411 -0.06015253]
  [-0.04677985 -0.02400224  0.07218176]
  [ 0.13233523  0.11446936 -0.14457935]
  [ 0.03544743  0.03570451 -0.00228516]
  [ 0.04147005  0.037381   -0.01222334]]

 [[-0.08918341 -0.04746325  0.11876029]
  [-0.13129391 -0.071137    0.1478985 ]
  [-0.00337427  0.01399431 -0.02701404]
  [ 0.06780555  0.05312815 -0.10822886]
  [ 0.13682339  0.09938297 -0.18392384]]]


## return_state

- False (default) - output 만 반환

- True - output, last step 의 hidden state, cell state (LSTM 의 경우) 반환

In [6]:
def lstm(return_state=False):
    inp = Input(shape=(T, D))
    out = LSTM(U, return_state=return_state)(inp)

    model = Model(inputs=inp, outputs=out)

    if return_state:
        o, h, c = model.predict(X)
        print("o :", o.shape)
        print(o)
        print("h :", h.shape)
        print(h)
        print("c :", c.shape)
        print(c)
    else:
        o = model.predict(X)
        print("o :", o.shape)
        print(o)

print("---- return_state=False ----> output only")
lstm(return_state=False)
print("\n---- return_state=True ----> output, hidden state, cell state all")
lstm(return_state=True)

---- return_state=False ----> output only
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 207ms/step
o : (2, 3)
[[ 0.01456037 -0.0042174   0.02722776]
 [ 0.02433373 -0.0124751   0.13426363]]

---- return_state=True ----> output, hidden state, cell state all
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 188ms/step
o : (2, 3)
[[ 0.0031897   0.00246153 -0.03190255]
 [ 0.01575843 -0.07051131 -0.09095763]]
h : (2, 3)
[[ 0.0031897   0.00246153 -0.03190255]
 [ 0.01575843 -0.07051131 -0.09095763]]
c : (2, 3)
[[ 0.00635538  0.00485226 -0.06430057]
 [ 0.03014722 -0.13212386 -0.18818294]]


# Bidirectional LSTM

- 순방향, 역방향이 concatenate 된 output 출력  

- hidden state, cell state 는 순방향, 역방향 별도 출력

In [7]:
T, D, U

(5, 1, 3)

In [8]:
def bi_lstm(return_sequences=False, return_state=False):
    inp = Input(shape=(T, D))
    out = Bidirectional(
            LSTM(U, return_state=return_state, return_sequences=return_sequences))(inp)

    model = Model(inputs=inp, outputs=out)

    if return_state:
        o, h1, c1, h2, c2 = model.predict(X)
        print("o :",o.shape)
        print("h1 :", h1.shape)
        print("c1 :", c1.shape)
        print("h2 :", h2.shape)
        print("c2 :", c2.shape)
    else:
        o = model.predict(X)
        print("o :", o.shape)

print("*** 순방향, 역방향이 concatenate ***")
print("---- return_sequences=False ----> last timestep 의 output 만 반환")
bi_lstm(return_sequences=False, return_state=False)
print()
print("---- return_sequences=True ----> 모든 timestep 별 output 출력")
bi_lstm(return_sequences=True)
print()
print("---- return_sequences=True, return_state=True")
bi_lstm(return_state=True)

*** 순방향, 역방향이 concatenate ***
---- return_sequences=False ----> last timestep 의 output 만 반환
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 340ms/step
o : (2, 6)

---- return_sequences=True ----> 모든 timestep 별 output 출력
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 365ms/step
o : (2, 5, 6)

---- return_sequences=True, return_state=True
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 364ms/step
o : (2, 6)
h1 : (2, 3)
c1 : (2, 3)
h2 : (2, 3)
c2 : (2, 3)
