# 計算グラフを理解しよう:2層ニューラルネットワークの実装に向けて

In [13]:
import numpy as np

## 加算レイヤと乗算レイヤ，除算レイヤの実装

In [1]:
# 加算レイヤ
class AddLayer:
    def __init__(self):
        self.x = None
        self.y = None

    # 順伝播
    def forward(self, x, y):
        self.x = x
        self.y = y
        return x + y
    
    # 逆伝播
    def backward(self, dout):
        dx = dout * 1
        dy = dout * 1
        return dx, dy


In [3]:
x = 10
y = 20
add_layer = AddLayer() # インスタンス化
z = add_layer.forward(x, y) # 順伝播
print(z)

30


In [4]:
dout = 1
dx, dy = add_layer.backward(dout) # 逆伝播
print(dx, dy)

1 1


In [2]:
# 乗算レイヤ
class MulLayer:
    def __init__(self):
        self.x = None
        self.y = None
    
    # 順伝播
    def forward(self, x, y):
        self.x = x
        self.y = y
        return x * y
    
    # 逆伝播
    def backward(self, dout):
        dx = dout * self.y
        dy = dout * self.x
        return dx, dy

In [5]:
x = 10
y = 20
mul_layer = MulLayer() # インスタンス化
z = mul_layer.forward(x, y) # 順伝播
print(z)

200


In [6]:
dout = 1
dx, dy = mul_layer.backward(dout) # 逆伝播
print(dx, dy)

20 10


In [8]:
class DivLayer:
    def __init__(self):
        self.x = None
        self.y = None
    
    # 順伝播
    def forward(self, x, y):
        self.x = x
        self.y = y
        return x / y
    
    # 逆伝播
    def backward(self, dout):
        dx = dout / self.y
        dy = dout * (-self.x / self.y ** 2)
        return dx, dy

In [9]:
x = 10
y = 20
div_layer = DivLayer() # インスタンス化
z = div_layer.forward(x, y) # 順伝播
print(z)

0.5


In [10]:
dout = 1
dx, dy = div_layer.backward(dout) # 逆伝播
print(dx, dy)

0.05 -0.025


## Sigomidレイヤの実装
シグモイド関数は以下のように計算できます．
$$
\sigma(x) = \frac{1}{1 + \exp(-x)}
$$

In [7]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

シグモイド関数の微分は以下のように計算できます．
$$
\frac{\partial \sigma(x)}{\partial x} = \sigma(x)(1 - \sigma(x))
$$

In [11]:
class SigmoidLayer:
    def __init__(self):
        self.out = None
    
    # 順伝播
    def forward(self, x):
        self.out = sigmoid(x)
        return self.out
    
    # 逆伝播
    def backward(self, dout):
        dx = dout * self.out * (1.0 - self.out)
        return dx

In [14]:
x = 10
sigmoid_layer = SigmoidLayer() # インスタンス化
y = sigmoid_layer.forward(x) # 順伝播
print(y)

0.9999546021312976


In [15]:
dout = 1
dx = sigmoid_layer.backward(dout) # 逆伝播
print(dx)

4.5395807735907655e-05


## tanhレイヤの実装
tanh関数は以下のように計算できます．
$$
\tanh(x) = \frac{\exp(x) - \exp(-x)}{\exp(x) + \exp(-x)}
$$


In [23]:
def tanh(x):
    return np.tanh(x)

tanh関数の微分は以下のように計算できます．
$$
\frac{\partial \tanh(x)}{\partial x} = 1 - \tanh^2(x)
$$


In [24]:
class TanhLayer:
    def __init__(self):
        self.out = None
    
    # 順伝播
    def forward(self, x):
        self.out = tanh(x)
        return self.out
    
    # 逆伝播
    def backward(self, dout):
        dx = dout * (1.0 - self.out ** 2)
        return dx

## MatMulレイヤの実装
行列の積は以下のように計算できます．
$$
\vec{y} = \vec{x} \cdot W 
$$

行列の積の微分は以下のように計算できます．
$$
\frac{\partial \vec{y}}{\partial \vec{x}} = W^T
$$

$W$での微分は以下のように計算できます．
$$
\frac{\partial \vec{y}}{\partial W} = \vec{x}^T
$$

$x,y$がベクトルではばく，行列でも同様で
$$
y = x \cdot W
$$

$$
\frac{\partial y}{\partial x} = W^T
$$

$$
\frac{\partial y}{\partial W} = x^T
$$



In [25]:
class MatMul:
    def __init__(self, W):
        self.W = W
        self.x = None
        self.dW = None
    
    # 順伝播
    def forward(self, x):
        self.x = x
        out = np.dot(x, self.W)
        return out
    
    # 逆伝播
    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        return dx

In [39]:
x = np.array([[1, 2],
               [3, 4]])
W = np.array([[1, 2, 3], 
              [4, 5, 6]])
matmul_layer = MatMul(W) # インスタンス化
y = matmul_layer.forward(x) # 順伝播
print(y)

[[ 9 12 15]
 [19 26 33]]


In [40]:
dout = np.array([[1, 2, 3], 
                 [4, 5, 6]])
dx = matmul_layer.backward(dout) # 逆伝播
print(dx)


[[14 32]
 [32 77]]


## RNNレイヤの実装
RNNレイヤの順伝播は以下のように計算できます．
\begin{align*}
\mathbf{h}_t &= \tanh(\mathbf{h}_{t-1} \mathbf{W}_h + \mathbf{x}_t \mathbf{W}_x + \mathbf{b})
\end{align*}

RNNレイヤの逆伝播は以下のように計算できます．
\begin{align*}
\frac{\partial L}{\partial \mathbf{h}_{t-1}} &= \frac{\partial L}{\partial \mathbf{h}_t} \mathbf{W}_h^T \\
\frac{\partial L}{\partial \mathbf{W}_h} &= \mathbf{h}_{t-1}^T \frac{\partial L}{\partial \mathbf{h}_t} \\
\frac{\partial L}{\partial \mathbf{x}_t} &= \frac{\partial L}{\partial \mathbf{h}_t} \mathbf{W}_x^T \\
\frac{\partial L}{\partial \mathbf{W}_x} &= \mathbf{x}_t^T \frac{\partial L}{\partial \mathbf{h}_t} \\
\frac{\partial L}{\partial \mathbf{b}} &= \sum_{t=1}^T \frac{\partial L}{\partial \mathbf{h}_t}
\end{align*}


In [41]:
class RNN:
    def __init__(self , Wx ,Wh ,b):
        self.params = [Wx ,Wh ,b]
        self.grads = [np.zeros_like(Wx) ,np.zeros_like(Wh) ,np.zeros_like(b)] # パラメータの初期化
        self.cache = None 
    
    def forward(self ,x ,h_prev):
        Wx ,Wh ,b = self.params
        t = np.dot(h_prev ,Wh) + np.dot(x ,Wx) + b
        h_next = np.tanh(t)
        
        self.cache = (x ,h_prev ,h_next)
        return h_next

    def backward(self ,dh_next):
        Wx ,Wh ,b = self.params
        x ,h_prev ,h_next = self.cache
        
        dt = dh_next * (1 - h_next**2) 
        db = np.sum(dt ,axis = 0)
        dWh = np.dot(h_prev.T ,dt)
        dh_prev = np.dot(dt ,Wh.T)
        dWx = np.dot(x.T ,dt)
        dx = np.dot(dt ,Wx.T)
        
        self.grads[0][...] = dWx
        self.grads[1][...] = dWh
        self.grads[2][...] = db
        
        return dx ,dh_prev