# 오차역전파

In [1]:
# 활성화 함수의 순전파, 역전파 클래스 생성

import numpy as np
from functions import *

class Relu:
    def __init__(self):
        self.mask=None
        
    def foward(self, x):
        self.mask=(x<=0)
        out=x.copy()
        out[self.mask]=0
        
        return out
    
    def backward(self, dout):
        dout[self.mask]=0
        dx=dout
        
        return dx
    
class Sigmoid:
    def __init__(self):
        self.out=None
        
    def forward(self, x):
        out=sigmoid(x)
        self.out=out
        return out

    def backward(self, dout):
        dx=dout*(1-self.out)*self.out
        return dx
    
class Affine:
    def __init__(self, W, b):
        self.W=W
        self.b=b
        
        self.x=None
        self.original_x_shape=None
        
        self.dW=None
        self.db=None
        
    def forward(self, x):
        self.original_x_shape=x.shape
        x=x.reshape(x.shpae[0], -1)
        self.x=x
        out=np.dot(self.x, self.W)+self.b
        return out
    def backward(self, dout):
        dx=np.dot(dout, self.W.T)
        self.dW=np.dot(self.x.T, dout)
        self.db=np.sum(dout, axis=0)
        
        dx=dx.reshape(*self.original_x_shape) # 입력 데이터 모향 변경

        return dx
    
class SoftmaxWithLoss:
    def __init__(self):
        self.loss=None
        self.y=None
        self.t=None
        
    def forward(self, x, t):
        self.t=t
        self.y=softmax(x)
        self.loss=cross_entropy_error(self.y, self.t)
        
        return self.loss
    def backward(self, dout=1):
        batch_size=self.t.shape[0]
        if self.t.size==self.y.size: # 정답 레이블이 원-핫 인코딩 형태일 때
            dx=(self.y-self.t)/batch_size
        else:
            dx=self.y.copy()
            dx[np.arrange(batch_size), self.t] -=1
            dx=dx/batch_size

In [15]:
# Two layer deep learning architecture 구현하기

import numpy as np
from layers import *
from gradient import numerical_gradient
from collections import OrderedDict

class TwoLayerNet:
    
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        # 가중치 초기화
        self.params={}
        self.params['W1']=weight_init_std*np.random.randn(input_size, hidden_size)
        self.params['b1']=np.zeros(hidden_size)
        self.params['W2']=weight_init_std*np.random.randn(hidden_size, output_size)
        self.params['b2']=np.zeros(output_size)
        
        # 계층생성
        self.layers=OrderedDict()
        self.layers['Affine1']=Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu']=Relu()
        self.layers['Affine2']=Affine(self.params['W2'], self.params['b2'])
        
        self.lastlayer=SoftmaxWithLoss()
        
    def predict(self,x):
        for layer in self.layers.values():
            x=layer.forward(x)
            
        return x
    
    def loss(self, x, t):
        y=self.predict(x)
        return self.lastlayer.forward(y,t)
    
    def accuracy(self,x,t):
        y=self.predict(x)
        y=np.argmax(y, axis=1)
        if t.ndim !=1 : t=np.argmax(t, axis=1)
            
        accuracy=np.sum(y == t)/float(x.shape[0])
        return accuracy
    
    # 경사하강법 (dw)
    def numerical_gradient(self, x,t):
        loss_W=lambda W: self.loss(x,t)
        
        grads={}
        grads['W1']=numerical_gradient(loss_W, self.params['W1'])
        grads['b1']=numerical_gradient(loss_W, self.params['b1'])
        grads['W2']=numerical_gradient(loss_W, self.params['W2'])
        grads['b2']=numerical_gradient(loss_W, self.params['b2'])
        
        return grads
    
    # 오차역전파
    def gradient(self, x, t):
        #forward
        self.loss(x,t)
        #backward
        dout=1
        dout=self.lastlayer.backward(dout)
        
        layers=list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout=layer.backward(dout)
            
        grads={}
        grads['W1'], grads['b1']=self.layers['Affine1'].dW, self.layers['Affine1'].db
        grads['W2'], grads['b2']=self.layers['Affine2'].dW, self.layers['Affine2'].db
        
        return grads

In [17]:
## 학습 및 실행

import numpy as np
from mnist import load_mnist

(x_train, t_train), (x_test, t_test)=load_mnist(normalize=True, one_hot_label=True)

network=TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

iters_num=10000
train_size=x_train.shape[0]
batch_size=100
learning_rate=0.1

train_loss_list=[]
train_acc_list=[]
test_acc_list=[]

iter_per_epoch=max(train_size/batch_size, 1)

for i in range(iters_num):
    batch_mask=np.random.choice(train_size, batch_size)
    x_batch=x_train[batch_mask]
    t_batch=t_train[batch_mask]
    
    # 오차역전파를 이용하여 기울기 계산
    
    grad=network.gradient(x_batch, t_batch) 
    
    # 가중치 업데이트
    
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key]-=learning_rate*grad[key]
        
    loss =network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    if i % iter_per_epoch == 0:
        train_acc=network.accuracy(x_train, t_train)
        test_acc=network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        
        print(train_acc, test_acc)
        
    
    
    
    

0.1379 0.1422
0.9008833333333334 0.9046
0.9211666666666667 0.9253
0.9340666666666667 0.9335
0.9440833333333334 0.9427
0.9514666666666667 0.9489
0.9565166666666667 0.9545
0.9610166666666666 0.9581
0.9643166666666667 0.9603
0.9677 0.962
0.9695833333333334 0.9615
0.9722 0.9664
0.9733 0.9689
0.97525 0.9682
0.97585 0.9691
0.97795 0.9688
0.9778166666666667 0.9698


In [22]:
print(network.params['b2'])

[-0.39568315  0.51556343 -0.1132285  -0.23007895  0.09420642  0.40699907
 -0.04150895  0.11624122 -0.31065433 -0.04185625]
