<a href="https://colab.research.google.com/github/ImJongHwan/practice-ml-nlp/blob/main/8_recurrent_neural_network/4_SimpleRNN_And_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 케라스의 SImpleRNN과 LSTM 이해하기

https://wikidocs.net/106473

## 임의의 입력 생성하기

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import SimpleRNN, LSTM, Bidirectional

In [3]:
train_X = [[0.1, 4.2, 1.5, 1.1, 2.8], [1.0, 3.1, 2.5, 0.7, 1.1], [0.3, 2.1, 1.5, 2.1, 0.1], [2.2, 1.4, 0.5, 0.9, 1.1]]
print(np.shape(train_X))

(4, 5)


In [4]:
train_X = [[[0.1, 4.2, 1.5, 1.1, 2.8], [1.0, 3.1, 2.5, 0.7, 1.1], [0.3, 2.1, 1.5, 2.1, 0.1], [2.2, 1.4, 0.5, 0.9, 1.1]]]

train_X = np.array(train_X, dtype=np.float32)
print(train_X.shape)

(1, 4, 5)


## SImpleRNN 이해하기

In [5]:
rnn = SimpleRNN(3)

hidden_state = rnn(train_X)

print(f'hidden state: {hidden_state}, shape: {hidden_state.shape}')

hidden state: [[-0.7823203  -0.9969347   0.82022274]], shape: (1, 3)


In [8]:
rnn = SimpleRNN(3, return_sequences=True)
hidden_states = rnn(train_X)

print(f'hidden state: {hidden_states}, shape: {hidden_states.shape}')

hidden state: [[[-0.05079566 -0.9998932   0.9968641 ]
  [-0.9065145  -0.9998499   0.9991206 ]
  [-0.938367   -0.99777496  0.6430657 ]
  [-0.9942498  -0.9994254   0.89302814]]], shape: (1, 4, 3)


In [9]:
rnn = SimpleRNN(3, return_sequences=True, return_state=True)
hidden_states, last_state = rnn(train_X)

print(f'hidden states :{hidden_states}, shape: {hidden_states.shape}')
print(f'last hidden state: {last_state}, shape: {last_state.shape}')

hidden states :[[[-0.9581549  -0.5580347   0.9705935 ]
  [-0.7547773  -0.9775237   0.6432697 ]
  [-0.8451581  -0.3512277   0.39678097]
  [-0.7934664  -0.9605735   0.9762197 ]]], shape: (1, 4, 3)
last hidden state: [[-0.7934664 -0.9605735  0.9762197]], shape: (1, 3)


## LSTM 이해하기

In [12]:
lstm = LSTM(3, return_sequences=False, return_state=True)
hidden_state, last_state, last_cell_state = lstm(train_X)

print(f'hidden state: {hidden_state}, shape: {hidden_state.shape}')
print(f'last hidden state: {last_state}, shape: {last_state.shape}')
print(f'last cell state: {last_cell_state}, shape: {last_cell_state.shape}')

hidden state: [[ 0.00484642 -0.30113038  0.07607817]], shape: (1, 3)
last hidden state: [[ 0.00484642 -0.30113038  0.07607817]], shape: (1, 3)
last cell state: [[ 0.03166305 -0.69533145  0.20877737]], shape: (1, 3)


In [13]:
lstm = LSTM(3, return_sequences=True, return_state=True)
hidden_state, last_state, last_cell_state = lstm(train_X)

print(f'hidden state: {hidden_state}, shape: {hidden_state.shape}')
print(f'last hidden state: {last_state}, shape: {last_state.shape}')
print(f'last cell state: {last_cell_state}, shape: {last_cell_state.shape}')

hidden state: [[[-0.2654195   0.5573452   0.45687416]
  [-0.5005965  -0.37524775  0.700234  ]
  [-0.5522358  -0.31156388  0.59719485]
  [-0.40298072 -0.5999708   0.46633834]]], shape: (1, 4, 3)
last hidden state: [[-0.40298072 -0.5999708   0.46633834]], shape: (1, 3)
last cell state: [[-1.0948458  -0.8093393   0.69740576]], shape: (1, 3)


## Bidirectional(LSTM) 이해하기

In [14]:
k_init = tf.keras.initializers.Constant(value=0.1)
b_init = tf.keras.initializers.Constant(value=0)
r_init = tf.keras.initializers.Constant(value=0.1)

In [15]:
bilstm = Bidirectional(LSTM(3, return_sequences=False, return_state=True, kernel_initializer=k_init, bias_initializer=b_init, recurrent_initializer=r_init))
hidden_state, forward_h, forward_c, backward_h, backward_c = bilstm(train_X)

print(f'hidden state: {hidden_state}, shape: {hidden_state.shape}')
print(f'forward state: {forward_h}, shape: {forward_h.shape}')
print(f'backward state: {backward_h}, shape: {backward_h.shape}')

hidden state: [[0.63031393 0.63031393 0.63031393 0.7038734  0.7038734  0.7038734 ]], shape: (1, 6)
forward state: [[0.63031393 0.63031393 0.63031393]], shape: (1, 3)
backward state: [[0.7038734 0.7038734 0.7038734]], shape: (1, 3)


In [16]:
bilstm = Bidirectional(LSTM(3, return_sequences=True, return_state=True, kernel_initializer=k_init, bias_initializer=b_init, recurrent_initializer=r_init))
hidden_state, forward_h, forward_c, backward_h, backward_c = bilstm(train_X)

print(f'hidden state: {hidden_state}, shape: {hidden_state.shape}')
print(f'forward state: {forward_h}, shape: {forward_h.shape}')
print(f'backward state: {backward_h}, shape: {backward_h.shape}')

hidden state: [[[0.35906473 0.35906473 0.35906473 0.7038734  0.7038734  0.7038734 ]
  [0.5511133  0.5511133  0.5511133  0.58863586 0.58863586 0.58863586]
  [0.59115744 0.59115744 0.59115744 0.3951699  0.3951699  0.3951699 ]
  [0.63031393 0.63031393 0.63031393 0.21942244 0.21942244 0.21942244]]], shape: (1, 4, 6)
forward state: [[0.63031393 0.63031393 0.63031393]], shape: (1, 3)
backward state: [[0.7038734 0.7038734 0.7038734]], shape: (1, 3)
