In [1]:
import numpy as np

## 深層学習スクラッチ　リカレントニューラルネットワーク

【問題1】SimpleRNNのフォワードプロパゲーション実装

In [2]:
class SimpleInitializer:
    
    def __init__(self, sigma):
        self.sigma = sigma
        
    def W(self, *shape):
        return self.sigma * np.random.randn(*shape)
    def B(self, *shape):
        return np.ones(*shape)

In [3]:
class SimpleRNN:
    
    def __init__(self, n_nodes, initializer=SimpleInitializer(0.01), optimizer=None):
        self.n_nodes = n_nodes
        self.optimizer = optimizer
        self.initializer = initializer
        self.wx = None
        self.wh = None
        self.b = None
        self.X = None
        self.batch_size = None
        self.n_sequences = None
        self.n_features = None
        self.a = None
        self.h = None
        
    """
    X : 次の形のndarray, shape (batch_size, n_sequences, n_features)
    return : 次の形のndarray, shape (batch_size, n_nodes)
    """
    def forward(self, X):
        
        self.X = X
        self.batch_size, self.n_sequences, self.n_features = X.shape
        # 問題２を実施する時用にコメントアウト
#         self.wx = self.initializer.W(self.n_features, self.n_nodes)
#         self.wh = self.initializer.W(self.n_nodes, self.n_nodes)
#         self.b = self.initializer.B(self.n_nodes)
        self.a = np.zeros((self.batch_size, self.n_sequences, self.n_nodes))
        self.h = np.zeros((self.batch_size, self.n_sequences, self.n_nodes))
        
        for t in range(self.n_sequences):
            self.a[:, t] = self.X[:, t]@self.wx + self.h[:, t-1]@self.wh + self.b
            self.h[:, t] = np.tanh(self.a[:, t])
        
        return self.h[:, -1]

【問題2】小さな配列でのフォワードプロパゲーションの実験

In [4]:
x = np.array([[[1, 2], [2, 3], [3, 4]]])/100 # (batch_size, n_sequences, n_features)
w_x = np.array([[1, 3, 5, 7], [3, 5, 7, 8]])/100 # (n_features, n_nodes)
w_h = np.array([[1, 3, 5, 7], [2, 4, 6, 8], [3, 5, 7, 8], [4, 6, 8, 10]])/100 # (n_nodes, n_nodes)
batch_size = x.shape[0] # 1
n_sequences = x.shape[1] # 3
n_features = x.shape[2] # 2
n_nodes = w_x.shape[1] # 4
h = np.zeros((batch_size, n_nodes)) # (batch_size, n_nodes)
b = np.array([1, 1, 1, 1]) # (n_nodes,)
h = np.array([[0.79494228, 0.81839002, 0.83939649, 0.85584174]]) # (batch_size, n_nodes)

In [5]:
# インスタンス生成
rnn = SimpleRNN(n_nodes=n_nodes)
# 問題用に各種値を上書き
rnn.wx, rnn.wh, rnn.b = w_x, w_h, b
# フォワードプロパゲーション
hh = rnn.forward(x)

In [6]:
h

array([[0.79494228, 0.81839002, 0.83939649, 0.85584174]])

In [7]:
hh

array([[0.79494228, 0.81839002, 0.83939649, 0.85584174]])