### [순환신경망 RNN] <hr> 

In [2]:
import torch
import torch.nn as nn

In [11]:
HIDDEN_SIZE = 1
NUM_LAYERS = 1
SEQ_LENGTH = 3
BATCH_SIZE = 1

# 데이터 및 초기 hidden state
input = torch.randn(1, 3, 10)  # 입력데이터(배치크기, 시퀀스길이, 피처길이)

# 첫번째 hidden state 초기값
h0 = torch.randn(NUM_LAYERS, BATCH_SIZE, HIDDEN_SIZE)  # 히든 초기값(양방향*층수, 배치크기, 히든개수)

# RNN 인스턴스 생성
rnn = nn.RNN(10, HIDDEN_SIZE, NUM_LAYERS, batch_first = True)

# RNN 출력
output, hn = rnn(input, h0)

In [18]:
print(f'RNN 출력 - input\n-SHAPE ; {input.shape}  DIM : {input.ndim}D')
print(output)

RNN 출력 - input
-SHAPE ; torch.Size([1, 3, 10])  DIM : 3D
tensor([[[-0.6457],
         [-0.9697],
         [-0.8609]]], grad_fn=<TransposeBackward1>)


In [19]:
print(f'RNN 출력 - output\n- SHAPE ; {output.shape}  DIM : {output.ndim}D')

RNN 출력 - output
- SHAPE ; torch.Size([1, 3, 1])  DIM : 3D


In [23]:
print(f'RNN 출력 - hidden state\n- SHAPE : {hn.shape}  DIM : {hn.ndim}D')

RNN 출력 - hidden state
- SHAPE : torch.Size([1, 1, 1])  DIM : 3D


In [20]:
print('[RNN parameters]')
for name, param in rnn.named_parameters():
    print(f'---------[{name}] \n{param}]') 
# 같은 층에 있는 HS끼리 상호작용해서 절편 계산 ex) HIDDEN_SIZE = 3 : bias 수 = 9

[RNN PARAMETERS]
---------[weight_ih_l0] 
Parameter containing:
tensor([[-0.0231,  0.4460,  0.5643,  0.7584, -0.9757,  0.5525, -0.7677,  0.2187,
         -0.0482,  0.7619]], requires_grad=True)]
---------[weight_hh_l0] 
Parameter containing:
tensor([[0.7301]], requires_grad=True)]
---------[bias_ih_l0] 
Parameter containing:
tensor([0.2808], requires_grad=True)]
---------[bias_hh_l0] 
Parameter containing:
tensor([0.8415], requires_grad=True)]


In [22]:
# rnn 모델의 속성 출력
print(f'[all_weights] : {len(rnn.all_weights)}')
print(rnn.all_weights)

[all_weights] : 1
[[Parameter containing:
tensor([[-0.0231,  0.4460,  0.5643,  0.7584, -0.9757,  0.5525, -0.7677,  0.2187,
         -0.0482,  0.7619]], requires_grad=True), Parameter containing:
tensor([[0.7301]], requires_grad=True), Parameter containing:
tensor([0.2808], requires_grad=True), Parameter containing:
tensor([0.8415], requires_grad=True)]]


In [7]:
# 설계 : 다층 RNN, 층 2개

# 입력 초기 텐서 2개
input = torch.randn(1, 4, 10)  # (배치크기, 시퀀스(문장의 단어 수), 피처수(단어 표현 벡터 길이))
h0 = torch.randn(1, 1, 5)      # (양방향*층수, 배치크기, 은닉상태 사이즈) => 은닉상태 초기화
    
# RNN 인스턴스
rnn = nn.RNN(10, 5, 1, batch_first = True)

# 출력 텐서 2개
output, hn = rnn(input, h0)

In [8]:
from torchinfo import summary
summary(rnn)

Layer (type:depth-idx)                   Param #
RNN                                      85
Total params: 85
Trainable params: 85
Non-trainable params: 0

In [9]:
# RNN 모델의 속성 출력
print(f'[all_weight] - {len(rnn.all_weights)}개')
print(rnn.all_weights) 

[all_weight] - 1개
[[Parameter containing:
tensor([[-0.1778, -0.4063,  0.0246,  0.1611, -0.1057,  0.2479,  0.2652, -0.0958,
         -0.2414,  0.3913],
        [-0.3686,  0.2867,  0.3046,  0.3439, -0.3139, -0.2496,  0.1323, -0.3502,
         -0.1681, -0.1763],
        [ 0.3077, -0.1015, -0.2791, -0.2766, -0.0420,  0.1717, -0.2844, -0.2312,
          0.0167,  0.1308],
        [ 0.0647,  0.2333,  0.1081,  0.1551, -0.4444,  0.2606, -0.0060,  0.0054,
         -0.1526, -0.1833],
        [ 0.0625,  0.1250, -0.3363, -0.0012, -0.2375,  0.2495, -0.2147, -0.2569,
         -0.0400, -0.3665]], requires_grad=True), Parameter containing:
tensor([[-0.1546, -0.0384,  0.2214,  0.0385, -0.1643],
        [ 0.2052,  0.0226, -0.4339, -0.3078,  0.2671],
        [-0.4418, -0.4228, -0.1525, -0.0277, -0.2026],
        [-0.1767,  0.0503, -0.2043, -0.3920,  0.2399],
        [-0.1399, -0.1305,  0.2617, -0.3790,  0.2600]], requires_grad=True), Parameter containing:
tensor([ 0.4354,  0.3090, -0.0830, -0.1409, -0.438

In [10]:
# RNN 출력 텐서 output
output.shape, output.ndim, output

(torch.Size([1, 4, 5]),
 3,
 tensor([[[-0.4777, -0.2731,  0.4484, -0.8532, -0.9261],
          [-0.0364, -0.7533,  0.6997, -0.8998, -0.9047],
          [-0.4178,  0.5859,  0.0097,  0.1019,  0.1667],
          [ 0.5796, -0.8032,  0.5620, -0.9517, -0.8086]]],
        grad_fn=<TransposeBackward1>))