In [1]:
import torch
import torch.nn as nn
import math

### RNNのさらなる工夫

### Bidirectional RNN

In [2]:
class MyRNN:
    def __init__(self, input_size, hidden_size, bidirectional=False):
        self.hidden_size = hidden_size
        self.bidirectional = bidirectional
        init_range = 1.0 / math.sqrt(hidden_size)

        # 順方向の重みとバイアス
        self.W_in = torch.empty(hidden_size, input_size).uniform_(-init_range, init_range).clone().requires_grad_(True)
        self.W_h = torch.empty(hidden_size, hidden_size).uniform_(-init_range, init_range).clone().requires_grad_(True)
        self.b_in = torch.empty(hidden_size).uniform_(-init_range, init_range).clone().requires_grad_(True)
        self.b_h = torch.empty(hidden_size).uniform_(-init_range, init_range).clone().requires_grad_(True)
        

        # 逆向の重みとバイアス
        self.W_in_backward = torch.empty(hidden_size, input_size).uniform_(-init_range, init_range).clone().requires_grad_(True)
        self.W_h_backward = torch.empty(hidden_size, hidden_size).uniform_(-init_range, init_range).clone().requires_grad_(True)
        self.b_in_backward = torch.empty(hidden_size).uniform_(-init_range, init_range).clone().requires_grad_(True)
        self.b_h_backward = torch.empty(hidden_size).uniform_(-init_range, init_range).clone().requires_grad_(True)

        

    def forward(self, input, h_0=None):
        # input: [batch_size, seq_len, input_size
        self.input = input
        batch_size, self.seq_len, _ = input.size()
        self.h_0 = h_0
        
        if h_0 is None:
            self.h_0 = torch.zeros(1, batch_size, self.hidden_size)#.to(device)

        outputs = []
        # 順方向の処理
        h = self.h_0 # [1, batch_size, hidden_size]
        for i in range(seq_len):
            # input[:, i] : [batch_size, input_size]
            h = torch.tanh(input[:, i] @ self.W_in.T + self.b_in + h.squeeze(0) @ self.W_h.T + self.b_h) # [batch_size, hidden_size] :
            
            outputs.append(h.unsqueeze(1)) # h : [batch_size, hidden_size] -> [batch_size, 1, hidden_size] 
        self.output_seq = torch.cat(outputs, dim=1) # h : [batch_size, seq_len, hidden_size]

        # 逆方向の処理(双方向の場合)
        if self.bidirectional:
            h_backward = self.h_0
            outputs_backward = []
            for i in reversed(range(seq_len)):
                # input[:, i] : [batch_size, input_size]
                h_backward = torch.tanh(input[:, i] @ self.W_in_backward.T + self.b_in_backward + h.squeeze(0) @ self.W_h_backward.T + self.b_h_backward) # [batch_size, hidden_size] :
                outputs_backward.append(h_backward.unsqueeze(1)) # h : [batch_size, hidden_size] -> [batch_size, 1, hidden_size] 
            outputs_backward = outputs_backward[::-1] # [t, t-1, t-2, ... , 2, 1] -> [1, 2, 3, ... , t]
            self.output_seq_backward = torch.cat(outputs_backward, dim=1) # h : [batch_size, seq_len, hidden_size] 
            self.output_seq = torch.cat((self.output_seq, self.output_seq_backward), dim=2) #  [batch_size, seq_len, hidden_size*2] 
            h_n = torch.cat((h.unsqueeze(0), h_backward.unsqueeze(0)), dim=0) # [batch_size, hidden_size] -> [2, batch_size, hidden_size] # RNNの出力


        else:
            h_n = h.unsqueeze(0)
        # bidirectional = Trueの時   
        # h_n[0]のは順伝播のt=Tでの隠れ状態、h_n[1]のはt=1での隠れ状態が入っている
        # output_seqのはt = nの時の順伝播と逆伝搬がconcatされた状態で出力(最初のhidden_sizeが順伝播、最後のhidden_sizeが逆伝搬
        return self.output_seq, h_n

In [3]:
# テスト
input_size = 10
hidden_size = 3
batch_size = 8
seq_len = 5

input_tensor = torch.randn(batch_size, seq_len, input_size)
rnn = MyRNN(input_size, hidden_size, bidirectional=True)
output_seq, h_n  = rnn.forward(input_tensor) 
print(output_seq.shape, h_n.shape)

torch.Size([8, 5, 6]) torch.Size([2, 8, 3])


In [4]:
 # t = T 順伝播
h_n[0]

tensor([[ 0.6442, -0.0271, -0.9702],
        [ 0.1636, -0.4272,  0.9615],
        [-0.7471,  0.1447,  0.4163],
        [-0.5452, -0.2141,  0.9699],
        [ 0.7476,  0.8307,  0.5552],
        [ 0.0064, -0.2136,  0.2578],
        [ 0.6008,  0.3157,  0.2578],
        [ 0.8442, -0.3519, -0.8461]], grad_fn=<SelectBackward0>)

In [5]:
 # t = T 順伝播
output_seq[:, -1, :hidden_size] 

tensor([[ 0.6442, -0.0271, -0.9702],
        [ 0.1636, -0.4272,  0.9615],
        [-0.7471,  0.1447,  0.4163],
        [-0.5452, -0.2141,  0.9699],
        [ 0.7476,  0.8307,  0.5552],
        [ 0.0064, -0.2136,  0.2578],
        [ 0.6008,  0.3157,  0.2578],
        [ 0.8442, -0.3519, -0.8461]], grad_fn=<SliceBackward0>)

In [6]:
# t = T
output_seq[:, -1, :hidden_size] == h_n[0] 

tensor([[True, True, True],
        [True, True, True],
        [True, True, True],
        [True, True, True],
        [True, True, True],
        [True, True, True],
        [True, True, True],
        [True, True, True]])

In [7]:
# t = 1 逆伝搬
h_n[1]

tensor([[ 0.0026, -0.1756,  0.3559],
        [-0.2078, -0.6433,  0.5251],
        [ 0.6611,  0.1191, -0.4573],
        [ 0.9385, -0.9939,  0.9230],
        [ 0.1584, -0.8523,  0.5501],
        [ 0.8973, -0.7616,  0.4186],
        [-0.7024, -0.0830,  0.3416],
        [ 0.3912,  0.6610,  0.9656]], grad_fn=<SelectBackward0>)

In [8]:
# t = 1, 逆伝搬
output_seq[:, 0, hidden_size:] 

tensor([[ 0.0026, -0.1756,  0.3559],
        [-0.2078, -0.6433,  0.5251],
        [ 0.6611,  0.1191, -0.4573],
        [ 0.9385, -0.9939,  0.9230],
        [ 0.1584, -0.8523,  0.5501],
        [ 0.8973, -0.7616,  0.4186],
        [-0.7024, -0.0830,  0.3416],
        [ 0.3912,  0.6610,  0.9656]], grad_fn=<SliceBackward0>)

In [9]:
# t=1
output_seq[:, 0, hidden_size:] == h_n[1]

tensor([[True, True, True],
        [True, True, True],
        [True, True, True],
        [True, True, True],
        [True, True, True],
        [True, True, True],
        [True, True, True],
        [True, True, True]])

### 要素を逆順にする

In [10]:
list1 = [1,2,3,4,5]
list1[::-1]

[5, 4, 3, 2, 1]

### PytorchのBidirectional

In [30]:
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, rnn_type='LSTM', bidirectional=False, print_shape=False):
        super().__init__()
        self.num_direction = 2 if bidirectional else 1
        self.print_shape = print_shape


        if rnn_type == 'RNN':
            self.rnn = nn.RNN(input_size, hidden_size, batch_first=True, bidirectional=bidirectional) 
        
        elif rnn_type == 'GRU':
            self.rnn = nn.GRU(input_size, hidden_size, batch_first=True, bidirectional=bidirectional) 

        elif rnn_type == 'LSTM':
            self.rnn = nn.LSTM(input_size, hidden_size, batch_first=True, bidirectional=bidirectional) 

       
        else:
            raise ValueError('Unsapported RNN type. Choose from ["LSTM", "RNN", "GUR", "UGRNN"')

        self.fc = nn.Linear(hidden_size * self.num_direction, output_size)
            
            
            
     

    def forward(self, x):
        output_seq, h_n = self.rnn(x)
        print(f"size of output_seq : {output_seq.shape}")
        #print(h_n)
        print(f"size of h_n : {h_n[0].shape}")
        
        

        # bidirectionが実行されたshape確認用
        if self.print_shape:
            print(output_seq.shape)

        # output_seq : [batch_size, seq_len, hidden_size*num_direction]
        output_seq = output_seq[:, -1, :] # [batch_size, hidden_size*num_direction] 
        

        out = self.fc(output_seq)
        return out

In [31]:
# テスト
input_size = 10
hidden_size = 3
output_size = 3
batch_size = 8
seq_len = 5

input_tensor = torch.randn(batch_size, seq_len, input_size)
rnn = Model(input_size, hidden_size, output_size, bidirectional=True, print_shape=True)
output = rnn(input_tensor) 
print(output.shape)

size of output_seq : torch.Size([8, 5, 6])
size of h_n : torch.Size([2, 8, 3])
torch.Size([8, 5, 6])
torch.Size([8, 3])


### Deep RNN

In [32]:
num_layers_list = [1, 2, 3]

In [38]:
for num_layer in num_layers_list:
    rnn = nn.RNN(input_size, hidden_size, num_layers=num_layer, bidirectional=True, batch_first=True)
    output_seq, h_n = rnn(input_tensor)
    print()
    print(f"Num Layers{num_layer}")
    print(f"Output shape {output_seq.shape}")
    print(f"Last Hidden State Shape : {h_n.shape}")


Num Layers1
Output shape torch.Size([8, 5, 6])
Last Hidden State Shape : torch.Size([2, 8, 3])

Num Layers2
Output shape torch.Size([8, 5, 6])
Last Hidden State Shape : torch.Size([4, 8, 3])

Num Layers3
Output shape torch.Size([8, 5, 6])
Last Hidden State Shape : torch.Size([6, 8, 3])


In [39]:
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, rnn_type='LSTM', bidirectional=False, print_shape=False):
        super().__init__()
        self.num_direction = 2 if bidirectional else 1
        self.print_shape = print_shape


        if rnn_type == 'RNN':
            self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional) 
        
        elif rnn_type == 'GRU':
            self.rnn = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional) 

        elif rnn_type == 'LSTM':
            self.rnn = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=bidirectional) 

       
        else:
            raise ValueError('Unsapported RNN type. Choose from ["LSTM", "RNN", "GUR", "UGRNN"')

        self.fc = nn.Linear(hidden_size * self.num_direction, output_size)
            
            
            
     

    def forward(self, x):
        output_seq, h_n = self.rnn(x)
        print(f"size of output_seq : {output_seq.shape}")
        #print(h_n)
        print(f"size of h_n : {h_n[0].shape}")
        
        

        # bidirectionが実行されたshape確認用
        if self.print_shape:
            print(output_seq.shape)

        # output_seq : [batch_size, seq_len, hidden_size*num_direction]
        output_seq = output_seq[:, -1, :] # [batch_size, hidden_size*num_direction] 
        

        out = self.fc(output_seq)
        return out

In [41]:
# テスト
input_size = 10
hidden_size = 3
output_size = 3
batch_size = 8
seq_len = 5

input_tensor = torch.randn(batch_size, seq_len, input_size)
rnn = Model(input_size, hidden_size, output_size, num_layers=2 ,bidirectional=True, print_shape=True)
output = rnn(input_tensor) 
print(output.shape)

size of output_seq : torch.Size([8, 5, 6])
size of h_n : torch.Size([4, 8, 3])
torch.Size([8, 5, 6])
torch.Size([8, 3])
