In [1]:
import numpy as np
np.random.seed(1)

# 2. 線形変換

In [2]:
class Optimizer:
    def step(self, lr):
        self.w -= lr * self.dw
        self.b -= lr * self.db

class Linear(Optimizer):
    
    def __init__(self, x_n, y_n):
        self.w = np.random.randn(x_n, y_n) * np.sqrt(2/x_n)
        self.b = np.zeros(y_n)
        
    def forward(self, x):
        self.x = x
        self.y = np.dot(x, self.w) + self.b
        return self.y
    
    def backward(self, dy):
        self.dw = np.dot(self.x.T, dy)
        self.db = np.sum(dy, axis=0)
        self.dx = np.dot(dy, self.w.T)
        return self.dx

## 2.1 順伝播

In [3]:
x = np.random.randn(5,10)
x

array([[ 1.62434536, -0.61175641, -0.52817175, -1.07296862,  0.86540763,
        -2.3015387 ,  1.74481176, -0.7612069 ,  0.3190391 , -0.24937038],
       [ 1.46210794, -2.06014071, -0.3224172 , -0.38405435,  1.13376944,
        -1.09989127, -0.17242821, -0.87785842,  0.04221375,  0.58281521],
       [-1.10061918,  1.14472371,  0.90159072,  0.50249434,  0.90085595,
        -0.68372786, -0.12289023, -0.93576943, -0.26788808,  0.53035547],
       [-0.69166075, -0.39675353, -0.6871727 , -0.84520564, -0.67124613,
        -0.0126646 , -1.11731035,  0.2344157 ,  1.65980218,  0.74204416],
       [-0.19183555, -0.88762896, -0.74715829,  1.6924546 ,  0.05080775,
        -0.63699565,  0.19091548,  2.10025514,  0.12015895,  0.61720311]])

In [4]:
model = Linear(10, 3)
y1 = model.forward(x)
y1

array([[ 1.7968638 , -2.79515866, -4.58812462],
       [ 0.79365722, -0.68367494, -2.08356183],
       [ 0.68992279,  0.11283528,  0.9215842 ],
       [-1.41272376, -0.48200954,  0.37175357],
       [ 0.4940006 , -1.11218587,  1.18968491]])

## 2.2 逆伝播

In [5]:
model.backward(np.ones((5,3))).shape

(5, 10)

In [6]:
model.dw

array([[ 1.10233782,  1.10233782,  1.10233782],
       [-2.8115559 , -2.8115559 , -2.8115559 ],
       [-1.38332923, -1.38332923, -1.38332923],
       [-0.10727968, -0.10727968, -0.10727968],
       [ 2.27959464,  2.27959464,  2.27959464],
       [-4.73481807, -4.73481807, -4.73481807],
       [ 0.52309847,  0.52309847,  0.52309847],
       [-0.24016392, -0.24016392, -0.24016392],
       [ 1.87332589,  1.87332589,  1.87332589],
       [ 2.22304758,  2.22304758,  2.22304758]])

In [7]:
model.db

array([5., 5., 5.])

# 3. 活性化関数(Relu)

In [8]:
class Relu:
    
    def forward(self, x):
        self.x = x
        y = np.where(self.x <= 0, 0, self.x)
        return y
    
    def backward(self, dy):
        dx =dy * np.where(self.x <= 0, 0, 1)
        return dx

## 3.1 順伝播

In [9]:
y1

array([[ 1.7968638 , -2.79515866, -4.58812462],
       [ 0.79365722, -0.68367494, -2.08356183],
       [ 0.68992279,  0.11283528,  0.9215842 ],
       [-1.41272376, -0.48200954,  0.37175357],
       [ 0.4940006 , -1.11218587,  1.18968491]])

In [10]:
relu = Relu()

In [11]:
y2 = relu.forward(y1)
y2

array([[1.7968638 , 0.        , 0.        ],
       [0.79365722, 0.        , 0.        ],
       [0.68992279, 0.11283528, 0.9215842 ],
       [0.        , 0.        , 0.37175357],
       [0.4940006 , 0.        , 1.18968491]])

## 3.2 逆伝播

In [12]:
relu.backward(1)

array([[1, 0, 0],
       [1, 0, 0],
       [1, 1, 1],
       [0, 0, 1],
       [1, 0, 1]])

# 4. Softmax関数とCrossEntropyLoss

In [13]:
class CELoss:
    
    def forward(self, x, t):
        self.t = t
        self.y = np.exp(x)/np.sum(np.exp(x), axis=1, keepdims=True)  # ソフトマックス関数
        L = -np.sum(t*np.log(self.y+1e-7)) / len(self.y)
        return L
    
    def backward(self):
        dx = self.y - self.t
        return dx

## 4.1 順伝播

In [14]:
y2

array([[1.7968638 , 0.        , 0.        ],
       [0.79365722, 0.        , 0.        ],
       [0.68992279, 0.11283528, 0.9215842 ],
       [0.        , 0.        , 0.37175357],
       [0.4940006 , 0.        , 1.18968491]])

In [15]:
t = np.zeros((5,3))
t[0,0], t[1,0], t[2,2], t[3,2], t[4,0] = np.ones(5)
t

array([[1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.]])

In [16]:
loss = CELoss()

In [17]:
loss.forward(y2,t)

0.7776580427782468

## 4.2 逆伝播

In [18]:
loss.backward()

array([[-0.24904416,  0.12452208,  0.12452208],
       [-0.47489362,  0.23744681,  0.23744681],
       [ 0.35433046,  0.19896769, -0.55329815],
       [ 0.28983192,  0.28983192, -0.57966385],
       [-0.72339481,  0.16877907,  0.55461573]])