### < Topic >

1. review


2. topic

    2.1 순환 신경망 (Recurrent Neural Network, RNN)

    2.2 장단기 메모리 (Long Short-Term Memory, LSTM)

In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense( 1, input_dim = 3, activation = 'relu' ) )

In [3]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 1)                 4         
Total params: 4
Trainable params: 4
Non-trainable params: 0
_________________________________________________________________


In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense( 8, input_dim = 4, activation = 'relu' ) )
model.add(Dense( 1, activation = 'sigmoid' ) )

In [5]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 8)                 40        
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 9         
Total params: 49
Trainable params: 49
Non-trainable params: 0
_________________________________________________________________


## - RNN

### 은닉층: ht = tanh(WxXt + Whht-1 + b)를 파이썬으로 구현

- hidden_state_t = 0 -> 초기 은닉 상태: 0으로 초기화
- for input_t in input_length -> 각 시점마다 입력
    - output_t = tanh(input_t, hidden_state_t) -> 각 시점에 대한 은닉상태 연산
    - hidden_state_t = output_t -> 현재 시점의 은닉상태

In [8]:
import numpy as np

timesteps = 10 # 시점의 수, NLP 관점에서는 문장의 길이
input_dim = 4 # 입력 차원, NLP 관점에서는 단어 벡터의 차원
hidden_size = 8 # 은닉 상태의 크기, 메모리 셀의 용량

inputs = np.random.random((timesteps, input_dim)) # 입력에 해당하는 2D 텐서
hidden_state_t = np.zeros((hidden_size, )) # 초기 은닉 상태

In [9]:
print(hidden_state_t)  # 초기 은닉 상태로 모든 차원의 값이 0을 가짐

[0. 0. 0. 0. 0. 0. 0. 0.]


In [12]:
Wx = np.random.random((hidden_size, input_dim))  # (8,4) 2D 텐서, 입력 가중치
Wh = np.random.random((hidden_size, hidden_size)) # (8,8) 2D 텐서, 은닉 가중치
b = np.random.random((hidden_size,))  # (8,) 크기의 1D 텐서 생성 (bias)

In [13]:
print(np.shape(Wx)), print(np.shape(Wh)), print(np.shape(b))

(8, 4)
(8, 8)
(8,)


(None, None, None)

In [15]:
total_hidden_states = []

for input_t in inputs:
    output_t = np.tanh(np.dot(Wx, input_t) + np.dot(Wh, hidden_state_t) + b)
    
    total_hidden_states.append(list(output_t))
    
    print(np.shape(total_hidden_states))
    
    hidden_state_t = output_t
    
total_hidden_states = np.stack(total_hidden_states, axis = 0)

print(total_hidden_states)

(1, 8)
(2, 8)
(3, 8)
(4, 8)
(5, 8)
(6, 8)
(7, 8)
(8, 8)
(9, 8)
(10, 8)
[[0.99998896 0.99998321 0.99998343 0.99971733 0.99875751 0.99996701
  0.9999265  0.99999639]
 [0.99995918 0.99996468 0.99995819 0.99958761 0.99836939 0.99996227
  0.99972046 0.99999166]
 [0.99998982 0.99998495 0.99998458 0.99973    0.99881771 0.99996462
  0.99993581 0.99999651]
 [0.99999436 0.99998428 0.99999239 0.99987536 0.99844026 0.99998969
  0.99993032 0.99999889]
 [0.99999109 0.99998233 0.99998541 0.9995036  0.99865307 0.99996542
  0.9999364  0.99999505]
 [0.99999654 0.9999879  0.99999384 0.99987866 0.99868408 0.99998858
  0.99996154 0.99999911]
 [0.99998508 0.99997218 0.99998028 0.99950327 0.99828028 0.9999753
  0.99986415 0.99999453]
 [0.99999576 0.9999822  0.99999001 0.99978365 0.99864475 0.99998669
  0.99995435 0.99999863]
 [0.99999689 0.99999019 0.99999413 0.99989585 0.9988573  0.99998625
  0.99997049 0.99999918]
 [0.99998744 0.99997339 0.99997685 0.99972138 0.9985933  0.99997837
  0.99989388 0.99999704]]

## - Keras로 RNN 구현

# 가장 간단한 RNN 표현

- model.add(SimpleRNN(hidden_size))
- model.add(SimpleRNN(hidden_size, input_shape = (timesteps, input_dim)))
- model.add(SimpleRNN(hidden_size, input_length = M, input_dim = N)) # M, N은 정수

In [19]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN

In [21]:
model = Sequential()

model.add(SimpleRNN(3, input_shape = (2, 10)))

model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_1 (SimpleRNN)     (None, 3)                 42        
Total params: 42
Trainable params: 42
Non-trainable params: 0
_________________________________________________________________


In [22]:
model = Sequential()

model.add(SimpleRNN(3, batch_input_shape = (8, 2, 10)))

model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_2 (SimpleRNN)     (8, 3)                    42        
Total params: 42
Trainable params: 42
Non-trainable params: 0
_________________________________________________________________


In [25]:
model = Sequential()

model.add(SimpleRNN(3, batch_input_shape=(8, 2, 10), return_sequences=True))

model.summary()

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_6 (SimpleRNN)     (8, 2, 3)                 42        
Total params: 42
Trainable params: 42
Non-trainable params: 0
_________________________________________________________________


## - 깊은 순환 신경망 (Deep Recurrent Neural Network)

- model = Sequential()
- model.add(SimpleRNN(hidden_size, return_sequences=True))
- model.add(SimpleRNN(hidden_size, return_sequences=True))

## - 양방향 순환 신경망(Bidirectional Recurrent Neural Network)

- model = Sequential()
- model.add(Bidirectional(SimpleRNN(hidden_size, return_sequences = True), input_shape=(timesteps, input_dim)))