## [Problem 1] Simple Forward propagation implementation of RNN

In [1]:
class Sigmoid:
    def forward(self, A):
        self.A = A
        return self.sigmoid(A)
    def backward(self, dZ):
        _sig = self.sigmoid(self.A)
        return dZ * (1 - _sig)*_sig
    def sigmoid(self, X):
        return 1 / (1 + np.exp(-X))

class Tanh:
    def forward(self, A):
        self.A = A
        return np.tanh(A)
    def backward(self, dZ):
        return dZ * (1 - (np.tanh(self.A))**2)

class Softmax:
    def forward(self, X):
        self.Z = np.exp(X) / np.sum(np.exp(X), axis=1).reshape(-1,1)
        return self.Z
    def backward(self, Y):
        self.loss = self.loss_func(Y)
        return self.Z - Y
    def loss_func(self, Y, Z=None):
        if Z is None:
            Z = self.Z
        return (-1)*np.average(np.sum(Y*np.log(Z), axis=1))

class ReLU:
    def forward(self, A):
        self.A = A
        return np.clip(A, 0, None)
    def backward(self, dZ):
        return dZ * np.clip(np.sign(self.A), 0, None)

In [2]:
class SimpleInitializer:
    """
    Parameters
    ----------
    sigma : float
    """
    def __init__(self, sigma = 0.01):
        self.sigma = sigma

    def W(self, n_nodes1, n_nodes2):
        W = self.sigma * np.random.randn(n_nodes1, n_nodes2)
        return W

    def B(self, n_nodes2):
        B = self.sigma * np.random.randn(1, n_nodes2)
        return B

class SGD:

    def __init__(self, lr):
        self.lr = lr
    def update(self, layer):

        layer.B = layer.B - self.lr * np.average(layer.dA, axis=0)
        layer.W = layer.W - self.lr * layer.dW / layer.dA.shape[0]

        return layer

In [3]:
class SimpleRNN:

    def __init__(self, W_x, B_x, W_h, initializer, optimizer, activation):
        self.optimizer = optimizer
        # 初期化
        # initializerのメソッドを使い、self.Wとself.Bを初期化する
        #self.W1 = initializer.W(n_wx_nodes1, n_wx_nodes2)
        #self.B1 = initializer.B(1)
        self.Wx = W_x
        self.Bx = B_x
        self.Wh = W_h
        self.dA = 0
        self.dW = 0
        self.W = 0
        self.B = 0
        self.input_X_forward = 0
        self.input_prev_ht_forward = 0
        self.activation = activation
        self.n_sequece = 0

    def forward(self, X):

        self.input_X_forward = X
        self.n_sequece = X.shape[1]
        tmp_prev_h = np.zeros((X.shape[1]+1, X.shape[0], self.Wx.shape[1]))
        self.input_prev_ht_forward = np.zeros((X.shape[0], X.shape[1], self.Wx.shape[1]))
        y = np.zeros((X.shape[0], X.shape[1], self.Wx.shape[1]))
        tmp_y = np.zeros((X.shape[1], X.shape[0], self.Wx.shape[1]))
        for i in range(self.n_sequece):
            Xt = X[:,i]
            #Xt:(batch, Feature)
            tmp = np.dot(Xt, self.Wx) + self.Bx + tmp_prev_h[i]
            #tmp:(batch, Node1)
            tmp_y[i] = self.activation.forward(tmp)
            #h_prev:(batch, node2)
            tmp_prev_h[i+1] = np.dot(tmp_y[i], self.Wh)

        self.input_prev_ht_forward = tmp_prev_h.transpose(1,0,2)
        y = tmp_y.transpose(1,0,2)
        return y

    def backward(self, dA):

        dz = np.zeros_like(self.input_X_forward)
        tmp_dz = dz.transpose(1,0,2)

        loss_h = np.zeros((dA.shape[0], dA.shape[1]+1, dA.shape[2]))
        for i in reversed(range(self.n_sequece)):
            loss = dA[:,i,:] + loss_h[:,i,:]
            loss = self.activation.backward(loss) * loss
            dW = np.dot(self.input_X_forward[:,i,:].T, loss)
            tmp_dz[i] = np.dot(loss, self.Wx.T)
            self.dA = loss
            self.dW = dW
            self.W = self.Wx
            self.B = self.Bx
            self = self.optimizer.update(self)
            self.Wx = self.W
            self.Bx = self.B

            loss_h[:,i+1,:] = np.dot(loss, self.Wh.T)
            self.dA = loss
            dW = np.dot(self.input_prev_ht_forward[:,i,:].T, loss)
            self.dW = dW
            self.W = self.Wh
            self.B = 0
            self = self.optimizer.update(self)
            self.Wh = self.W

        dz = tmp_dz.transpose(1,0,2)
        return dz

## [Problem 2] Experiment of forward propagation with small sequence

In [4]:
import numpy as np 

x = np.array([[[1, 2], [2, 3], [3, 4]]])/100
w_x = np.array([[1, 3, 5, 7], [3, 5, 7, 8]])/100
w_h = np.array([[1, 3, 5, 7], [2, 4, 6, 8], [3, 5, 7, 8], [4, 6, 8, 10]])/100
batch_size = x.shape[0] # 1
n_sequences = x.shape[1] # 3
n_features = x.shape[2] # 2
n_nodes = w_x.shape[1] # 4
h = np.zeros((batch_size, n_nodes))
b = np.array([1])

In [5]:
rnn = SimpleRNN(w_x, 1, w_h, initializer=SimpleInitializer(), optimizer=SGD(0.01), activation=Tanh())

answer = rnn.forward(x)
answer

array([[[0.76188798, 0.76213958, 0.76239095, 0.76255841],
        [0.792209  , 0.8141834 , 0.83404912, 0.84977719],
        [0.79494228, 0.81839002, 0.83939649, 0.85584174]]])

In [6]:
answer[0,2]

array([0.79494228, 0.81839002, 0.83939649, 0.85584174])

## [Problem 3] (Advance assignment) Implementation of backpropagation

In [7]:
dA = np.array([[[0.01, 0.02, 0.03, 0.04], [0.01, 0.02, 0.03, 0.04], [0.01, 0.02, 0.03, 0.04]]])

In [9]:
rnn.backward(dA)

array([[[4.75883037e-05, 6.05642872e-05],
        [4.75883582e-05, 6.05643690e-05],
        [4.75884400e-05, 6.05644781e-05]]])