back propagation code of basic calculation and activation functions

In [118]:
import numpy as np
import sys, os
sys.path.append(os.pardir)
from mnist import load_mnist
from collections import OrderedDict

In [119]:
def softmax(x):
    if x.ndim == 2:
        x = x.T
        x = x - np.max(x, axis=0)
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        return y.T
    
    c = np.max(x)
    exp_x = np.exp(x-c)
    sum_exp_x = np.sum(exp_x)
    y = exp_x / sum_exp_x
    
    return y

In [120]:
def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
    
    batch_size = y.shape[0]
    return -np.sum(t*np.log(y + 1e-7)) / batch_size

In [121]:
def numerical_gradient(f, x):
    h = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + h
        fxh1 = f(x) # f(x+h)
        
        x[idx] = tmp_val - h 
        fxh2 = f(x) # f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2*h)
        
        x[idx] = tmp_val # 값 복원
        it.iternext()   
        
    return grad

In [122]:
class Relu:
    def __init__(self):
        self.mask = None
        
    def forward(self, x):
        self.mask = (x<=0)
        out = x.copy()
        out[self.mask] = 0
        
        return out
    
    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout
        
        return dx

In [123]:
class Sigmoid:
    def __init__(self):
        self.out = None
        
    def forward(self, x):
        out = 1/(1+np.exp(-x))
        self.out = out
        
        return out
    
    def backward(self, dout):
        dx = dout*self.out*(1-self.out)
        
        return dx
        
        

In [124]:
class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.db = None
        self.dW = None
        
    def forward(self, x):
        self.x = x
        out = np.dot(x, self.W) + self.b
        
        return out
    
    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.db = np.sum(dout, axis = 0)
        self.dW = np.dot(self.x.T, dout)
        
        return dx
        

In [125]:
class softmaxwithloss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None
        
    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        
        return self.loss
    
    def backward(self, dout=1): # 왜 batch_size로 나눠야 하는지 모르겠음.
        batch_size =self.t.shape[0]
        dx = (self.y - self.t) / batch_size
        
        return dx

In [126]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
        self.params={}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        
        self.lastLayer = softmaxwithloss()
        
    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
            
        return x
    
    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)
    
    def numerical_gradient(self, x, t): #calculate the gradient value for W1, b1, W2, b2 respectively.
        def loss_W(W):
            return self.loss(x,t) #loss_W = lambda W: self.loss(x,t) instead.
        
        grads={}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads
    
    def gradient(self, x, t):
        self.loss(x, t) # forward propagation
        
        dout = 1 # backward propagation
        dout = self.lastLayer.backward(dout)
        
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
        
        grads={}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db
        
        return grads

In [128]:
(x_train, t_train), (x_test, t_test) = load_mnist(normalize = True, one_hot_label = True)
# x_train.shape is (60000, 784)
network = TwoLayerNet(input_size = 784, hidden_size = 50, output_size = 10)

iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1
iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size, batch_size)
   
    x_batch = x_train[batch_mask]
    # x_batch.shape is (100, 784)
    t_batch = t_train[batch_mask]
    
    grad = network.gradient(x_batch, t_batch)
    
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate*grad[key]
        
    loss = network.loss(x_batch, t_batch)
    print(loss)

2.3016935511489827
2.299797947199496
2.3010886375209383
2.300594626352846
2.3000264589929924
2.299691302820746
2.298664033410893
2.298739024573268
2.2973938220465873
2.2968437511271347
2.2970967449680515
2.2956207101531363
2.2959201126075426
2.2967856391479895
2.2948093225364903
2.2926379454401937
2.2933722722786865
2.286107455353818
2.2892963340153063
2.2935942428045233
2.2892104203415
2.28778746525948
2.2886212876572234
2.2882612208732014
2.2863430167202314
2.2852443471069765
2.2814566786745547
2.2797239678952828
2.275751733013635
2.277034112257711
2.2684042636496535
2.277328621054668
2.2699779616457714
2.2706886213571664
2.2621414025050983
2.2587624007149048
2.261140333245568
2.2555203075684953
2.2525112845319826
2.2473011740456683
2.2427726861823287
2.240880184791932
2.2317872289203593
2.218900805368165
2.2257895242325647
2.213432242699455
2.2049887449159726
2.2047621938283197
2.1836561905190637
2.1718522784953938
2.1686646772446534
2.164156043349068
2.134108716204599
2.13345687366

0.27963537871094624
0.3688099997817431
0.30534377996262085
0.32684968556743316
0.23932281672284814
0.3212200731401938
0.2435921162361914
0.31993784900606187
0.32867445524384437
0.38041219362838957
0.23864671258387346
0.19907399989777885
0.2336106160639681
0.3681807068633019
0.22252656747684946
0.415176420104561
0.4161717370260394
0.31016400321051774
0.28221386312955443
0.43213950963446834
0.3464852940721543
0.32532159320502047
0.18663751174892235
0.606620131552328
0.36994710813558357
0.21377548178781017
0.3097223779646574
0.2992606957707902
0.37023080773895567
0.22904231902693467
0.2839354452246194
0.4035672577066362
0.29651390805857003
0.31546815102138337
0.35857989490379777
0.26336098394653296
0.27649082350078297
0.4568207791165102
0.2655841211811295
0.36085950402597833
0.26696543630792285
0.4455406686207299
0.24672600487376734
0.2662237296924588
0.38478649048923474
0.33727670023493345
0.34904492693985445
0.3950653184054604
0.29336578086158627
0.31876432621887835
0.39369895427529045


0.22820591112402863
0.1774052770732358
0.22329353787151365
0.28912102063341444
0.2964951423931573
0.1976850730027183
0.1848817206280632
0.4366865811853553
0.21377545312635815
0.2950659100425414
0.2779098296132691
0.23212201505911317
0.27335335772024716
0.4826287227747696
0.3205889208348763
0.3804586176880843
0.21521103143565198
0.2584211668589106
0.31743258518995104
0.3644468795095701
0.32615861324305334
0.18037231727274602
0.21617602128117344
0.24233539949003824
0.22354063929313
0.36567147127559596
0.2734469641098051
0.4453928705373326
0.5119526428736354
0.24171112487670673
0.24159930518319023
0.3481629709875159
0.23497950727716435
0.18153994846651517
0.3177398058811496
0.2303764118088872
0.5554460531891544
0.2795764858695306
0.25899707618677686
0.29893650852865034
0.3954839090127271
0.14653793219700584
0.42514755246731367
0.31020408322564946
0.303131922258849
0.44452310374063025
0.17118522239079625
0.5341103930429635
0.2094859629362347
0.3218168730812313
0.25774973085179725
0.1885534

0.25869957657516224


KeyboardInterrupt: 