In [33]:
import numpy as np
from common.functions import softmax, cross_entropy_error

In [34]:
# 乗算レイヤの実装
class MulLayer:
    def __init__(self):
        self.x = None
        self.y = None
    
    def forward(self, x, y):
        self.x = x
        self.y = y
        out = x * y
        return out
    
    
    def backward(self, dout): # 微分
        dx = dout * self.y # xとyをひっくり返す
        dy = dout * self.x
        return dx, dy

In [35]:
apple = 100
apple_num = 2
tax = 1.1

# layer
mul_apple_layer = MulLayer()
mul_tax_layer = MulLayer()

# forward
apple_price = mul_apple_layer.forward(apple, apple_num)
price = mul_tax_layer.forward(apple_price, tax)

print('apple price:', apple_price)


# backforward
dprice = 1
dapple_price , dtax = mul_tax_layer.backward(dprice)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

print(f"dapple:{dapple}\ndapple_num:{dapple_num}\ndtax{dtax}")

apple price: 200
dapple:2.2
dapple_num:110.00000000000001
dtax200


In [19]:
# 加算レイヤnの実装

class AddLayer:
    def __init__(self):
        pass
    
    def forward(self, x, y):
        out = x + y
        return out
    
    def backward(self, dout):
        dx = dout * 1
        dy = dout * 1
        return dx, dy

In [27]:
# りんご2個とみかん3個の買い物

apple = 100
apple_num = 2
orange = 150
orange_num = 3
tax = 1.1

# layer
mul_apple_layer = MulLayer()
mul_orage_layter = MulLayer()
add_apple_orange_layer = AddLayer()
mul_tax_layer = MulLayer()


# forward
apple_price = mul_apple_layer.forward(apple, apple_num)
orange_price = mul_orage_layter.forward(orange, orange_num)
all_price = add_apple_orange_layer.forward(apple_price, orange_price)
price = mul_tax_layer.forward(all_price, tax)

# backward
dprice = 1
dall_price, dtax = mul_tax_layer.backward(dprice)
dapple_praice, dorange_price = add_apple_orange_layer.backward(dall_price)
dorange, dorange_num = mul_orage_layter.backward(dorange_price)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

print(price)
print(dapple_num, dapple, dorange, dorange_num, dtax)

715.0000000000001
110.00000000000001 2.2 3.3000000000000003 165.0 650


## Reluレイヤの実装

In [28]:
class Relu:
    def __init__(self):
        self.mask = None
        
    def forward(self):
        self.mask = (x <= 0)　# => boolのndarray
        out = x.copy()
        out[self.mask] == 0 #x(ndarray)の0以下の値を0にする
        
        return out
    
    
    def backward(self, dout):
        dout[self.mask] = 0 #maskの部分は0以下で微分も0, それ以外の　微分は1なので
        dx = dout
        
        return dx

## Sigmoidレイヤの実装

In [29]:
class Sigmoid:
    def __init__(self):
        self.out = None
        
    def forward(self, x):
        out = 1 / (1 + np.exp(-x))
        self.out = out
        
        return out
    
    def backward(self, dout):
        dx = dout * self.out * (1.0 - self.out)
        
        return dx
    

In [None]:
## Affineレイヤの実装
class Affine:
    def __init__(self):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None
        
    def forward(self, x):
        self.x = x
        out = np.dot(x, self.W) + self.b
        return out
    
    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)
        
        return dx

## softmax and cross-entropyレイヤの実装 

In [32]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None
        
    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        
    
    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) / batch_size #１データあたりの損失　＜注意＞
        return dx

## 誤差逆伝播法に対応したニューラルネットワークの実装

In [36]:
import numpy as np
from common.layers import *
from common.gradient import numerical_gradient
from collections import OrderedDict

In [109]:
class TwoLyerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        # 各パラメータ
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        
        # レイヤの生成
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        
        self.lastLayer = SoftmaxWithLoss()
        
    
    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        
        return x
     
        
    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)
    
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        
        # tがone-hotの時のため
        if t.ndim != 1 : t = np.argmax(t, axis=1)
        accuracy = np.sum(y == t) / float(x.shape[0])
        
        return accuracy
    
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads
    
    
    def gradient(self, x, t):
        #farward
        self.loss(x, t)
        
        #backward
        dout = 1
        dout = self.lastLayer.backward(dout)
        
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
        
        # 設定
        grads = {}
        grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
        grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db
        
        return grads

## 勾配確認

In [None]:
from hidden_others.mnist import load_mnist

# データ準備
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

# ニューラルネット構築
network = TwoLyerNet(input_size=784, hidden_size=50, output_size=10)

In [103]:
# バッチ (3データ分)
x_batch = x_train[:3]
t_batch = t_train[:3]

# 勾配計算
# grad_numerial = network.numerical_gradient(x_batch, t_batch)
grad_backprop = network.gradient(x_batch, t_batch)

for key in grad_numerial.keys():
    
    # 各パラメータの勾配の差を計算して、平均をとる -> 1パラメータの平均誤差
    diff = np.average(np.abs(grad_backprop[key] - grad_numerial[key]))
    print(key + ":" + str(diff))
    print("-"*30)

x.shape: (3, 784)
loss: 2.299873359472282
W1:4.114681189028882e-10
------------------------------
b1:2.5338072810428876e-09
------------------------------
W2:6.527093724466705e-09
------------------------------
b2:1.395630669170922e-07
------------------------------


In [100]:
x_batch.shape

(3, 784)

## 誤差逆伝播法を使った学習法

In [110]:
# データ準備
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

# ニューラルネット構築
network = TwoLyerNet(input_size=784, hidden_size=50, output_size=10)

In [111]:
# ハイパーパラメータ
iter_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1


train_loss_list = []
train_acc_list = []
test_acc_list = []

# epochにおけるbatch処理の繰り返し数
iter_per_epoch = max(train_size / batch_size, 1)


for i in range(iter_num):
    # バッチのデータ
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # バックプロパゲーションで勾配計算
    grad = network.gradient(x_batch, t_batch)
    
    # 更新
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate* grad[key]
        
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(train_acc, test_acc )

0.15451666666666666 0.1542
0.9039 0.9062
0.9234833333333333 0.9239
0.9365333333333333 0.936
0.9443666666666667 0.9416
0.9512666666666667 0.948
0.9571833333333334 0.953
0.9595666666666667 0.9567
0.96515 0.961
0.9678333333333333 0.9629
0.9692833333333334 0.961
0.9716666666666667 0.9649
0.9741 0.9666
0.9752166666666666 0.9681
0.97585 0.968
0.9778833333333333 0.9705
0.9785166666666667 0.9693
