# CHAPTER 5 오차역전파법

## 5-1 계산 그래프

### 5-1-1 계산 그래프로 풀다

### 5-1-2 국소적 계산

### 5-1-3 왜 계산 그래프로 푸는가?

## 5-2 연쇄 법칙

### 5-2-1 계산 그래프의 역전파

### 5-2-2 연쇄법칙아란?

### 5-2-3 연쇄법칙과 계산 그래프

## 5-3 역전파

### 5-3-1 덧셈 노드의 역전파

### 5-3-2 곱셈 노드의 역전파

### 5-3-3 사과 쇼핑의 예

## 5-4 단순한 계층 구현하기

### 5-4-1 곱셈 계층

In [None]:
class MulLayer:
    def __init__(self):
        self.x = None
        self.y = None
    
    def forward(self , x , y):
        self.x = x
        self.y = y
        out =x * y

        return out
    
    def backward(self , dout):
        dx = dout * self.y  #x와 y를 바꾼다
        dy = dout * self.x

        return dx , dy

In [None]:
#buy_apple
apple = 100
apple_num = 2
tax = 1.1

# 계층들
mul_apple_layer = MulLayer()
mul_tax_layer = MulLayer()

# 순전파
apple_price = mul_apple_layer.forward(apple, apple_num)
price = mul_tax_layer.forward(apple_price , tax)

print(price)

220.00000000000003


In [None]:
#buy_apple
#역전파
dprice = 1
dapple_price, dtax = mul_tax_layer.backward(dprice)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

print(dapple , dapple_num , dtax)

2.2 110.00000000000001 200


### 5-4-2 뎃셈 계층

In [None]:
class AddLayer:
    def __init__(self):
        pass
    
    def forward(self , x , y):
        out = x + y
        return out

    def backward(self , dout):
        dx = dout * 1
        dy = dout * 1
        return dx , dy

In [None]:
# buy_apple_orange

apple = 100
apple_num = 2
orange = 150
orange_num = 3
tax = 1.1

# 계층들
mul_apple_layer = MulLayer()
mul_orange_layer = MulLayer()
add_apple_orange_layer= AddLayer()
mul_tax_layer = MulLayer()

# 순전파
apple_price = mul_apple_layer.forward(apple , apple_num) #(1)
orange_price = mul_orange_layer.forward(orange , orange_num) #(2)
all_price = add_apple_orange_layer.forward(apple_price , orange_price) #(3)
price = mul_tax_layer.forward(all_price , tax) #(4)

# 역전파
dprice = 1
dall_price, dtax = mul_tax_layer.backward(dprice) #(4)
dapple_price , dorange_price = add_apple_orange_layer.backward(dall_price) #(3)
dorange , dorange_num = mul_orange_layer.backward(dorange_price) #(2)
dapple, dapple_num = mul_apple_layer.backward(dapple_price) #(1)

print(price)
print(dapple_num , dapple , dorange , dorange_num , dtax)

715.0000000000001
110.00000000000001 2.2 3.3000000000000003 165.0 650


## 5-5 활성화 함수 계층 구현하기

### 5-5-1 ReLU 계층

In [None]:
class Relu:
    def __init__(self):
        self.mask = None

    def forward(self , x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0

        return out

    def backward(self , dout):
        dout[self.mask] = 0
        dx = dout
        
        return dx

### 5-5-2 Sigmoid 계층

In [None]:
import numpy as np
class Sigmoid:
    def __init__(self):
        self.out = None
    
    def forward(self , x):
        out = 1 / (1 + np.exp(-x))
        self.out = out

        return out
    
    def backward(self , dout):
        dx = dout * (1.0 - self.out) * self.out

        return dx

## 5.6 Affine/Softmax 계층 구현하기

### 5-6-1 Affine 계층

### 5-6-2 배치용 Affine 계층

In [None]:
class Affine:
    def __init__(self , W , b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None
    
    def forward(self , x):
        self.x = x
        out = np.dot(x , self.W) + self.forward

        return out

    def backward(self , dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T  ,dout)
        self.db = np.sum(dout, axis = 0)

        return dx
    

In [None]:
%cd /content/drive/MyDrive/AI/독학/밑시딥

/content/drive/MyDrive/AI/독학/밑시딥


In [7]:
import sys, os
sys.path.append('/content/drive/MyDrive/AI/독학/밑시딥/common')
from functions import *

In [None]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None

    def forward(self , x , t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y , self.t)
        return self.loss
    
    def backward(self, dout = 1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) / batch_size

        return dx

## 5-7 오차역전파법 구현하기

### 5-7-1 신경망 학습의 전체 그림

### 5-7-2 오차역전파법을 적용한 신경망 구현하기

In [8]:
import numpy as np
from layers import *
from gradient import numerical_gradient
from collections import OrderedDict


In [36]:
class TwoLayerNet:

    def __init__(self , input_size , hidden_size , output_size , weight_init_std = 0.01):
        #가중치 초기화
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size , hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size , output_size)
        self.params['b2'] = np.zeros(output_size)

        # 계층 생성
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'] , self.params['b1'])
        self.layers['Relu'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'] , self.params['b2'])

        self.lastLayer = SoftmaxWithLoss()

    def predict(self , x):
        for layer in self.layers.values():
            x = layer.forward(x)

        return x

    # x:입력 데이터, t:정답 레이블
    def loss(self , x , t):
        y = self.predict(x)
        return self.lastLayer.forward(y , t)

    def accuracy(self , x ,t):
        y = self.predict(x)
        y = np.argmax(y , axis  =1)

        if t.ndim != 1 :
            t = np.argmax(t , axis = 1)

        accuracy = np.sum(y == t) / float(x.shape[0])

        return accuracy

    # x : 입력데이터, t: 정답레이블
    def numerical_gradient(self , x , t):
        loss_W = lambda W: self.loss(x,t)

        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1']) 
        grads['b1'] = numerical_gradient(loss_W, self.params['b1']) 
        grads['W2'] = numerical_gradient(loss_W, self.params['W2']) 
        grads['b2'] = numerical_gradient(loss_W, self.params['b2']) 
        return grads

    def gradient(self , x , t):
        # 순전파
        self.loss(x , t)

        # 역전파
        dout = 1
        dout = self.lastLayer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        #결과 저장
        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db

        return grads

### 5-7-3 오차역전파법으로 구한 기울기 검증하기

In [37]:
import sys, os
sys.path.append('/content/drive/MyDrive/AI/독학/밑시딥/dataset')
import numpy as np
from mnist import load_mnist

(x_train , t_train) , (x_test  , t_test) = load_mnist(normalize=True , one_hot_label = True)

network = TwoLayerNet(input_size = 784 , hidden_size = 50  ,output_size = 10)

x_batch = x_train[:3]
t_batch = t_train[:3]

grad_numerical = network.numerical_gradient(x_batch , t_batch)
grad_backprop = network.gradient(x_batch , t_batch)

#각 가중치의 차이의 절댓값을 구한 후 , 그 절댓값들의 평균을 낸다.
for key in grad_numerical.keys():
    diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]))
    print(key + ":" + str(diff))

W1:2.436982556394389e-13
b1:1.0531670511810392e-12
W2:9.464982909847957e-13
b2:1.2034817864492452e-10


### 5-7-4 오차역전파법을 사용한 학습 구현하기

In [38]:
import sys, os
sys.path.append('/content/drive/MyDrive/AI/독학/밑시딥/dataset')
import numpy as np
from mnist import load_mnist

# 데이터 읽기
(x_train , t_train) , (x_test , t_test) = load_mnist(normalize=True , one_hot_label = True)
network = TwoLayerNet(input_size = 784 , hidden_size = 50  ,output_size = 10)

iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size , 1)
for i in range(iters_num):
    batch_mask = np.random.choice(train_size , batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    #오차역전파법으로 기울기를 구한다.
    grad = network.gradient(x_batch , t_batch)

    #갱신
    for key in ('W1' , 'b1' , 'W2' , 'b2'):
        network.params[key] -= learning_rate * grad[key]

    loss = network.loss(x_batch , t_batch)
    train_loss_list.append(loss)

    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train , t_train)
        test_acc = network.accuracy(x_test , t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(train_acc , test_acc)

0.10346666666666667 0.1089
0.9047333333333333 0.9083
0.9228166666666666 0.9214
0.9339166666666666 0.9336
0.9449333333333333 0.9452
0.9506833333333333 0.948
0.95565 0.9513
0.9612 0.9582
0.96525 0.9593
0.9657166666666667 0.9607
0.9702 0.963
0.9704666666666667 0.9654
0.9735166666666667 0.9651
0.9746833333333333 0.968
0.9756833333333333 0.9677
0.9773333333333334 0.9674
0.97925 0.9705
