In [6]:
class MulLayer:
    def __init__(self):
        self.x=None
        self.y=None

    def forward(self, x, y):
        self.x=x
        self.y=y
        out=x*y

        return out

    def backward(self, dout):
        dx=dout*self.y # x와 y를 바꾼다
        dy=dout*self.x

        return dx, dy

In [8]:
apple = 100
apple_num = 2
tax=1.1

#계층
mul_apple_layer=MulLayer()
mul_tax_layer=MulLayer()

#순전파
apple_price=mul_apple_layer.forward(apple, apple_num)
price=mul_tax_layer.forward(apple_price, tax)

print(price)

220.00000000000003


In [14]:
#역전파
dprice=1
dapple_price, dtax=mul_tax_layer.backward(dprice)
dapple, dapple_num=mul_apple_layer.backward(dapple_price)

print(dapple, dapple_num, dtax)

2.2 110.00000000000001 200


In [16]:
class AddLayer:
    def __init__(self):
        pass

    def forward(self, x, y):
        out=x+y
        return out

    def backward(self, dout):
        dx=dout*1
        dy=dout*1
        return dx, dy

In [20]:
apple=100
apple_num=2
orange=150
orange_num=3
tax=1.1

mul_apple_layer=MulLayer()
mul_orange_layer=MulLayer()
add_apple_orange_layer=AddLayer()
mul_tax_layer=MulLayer()

apple_price=mul_apple_layer.forward(apple, apple_num)
orange_price=mul_orange_layer.forward(orange, orange_num)
all_price=add_apple_orange_layer.forward(apple_price, orange_price)
price=mul_tax_layer.forward(all_price, tax)

dprice=1
dall_price, dtax=mul_tax_layer.backward(dprice)
dapple_price, dorange_price=add_apple_orange_layer.backward(dall_price)
dorange, dorange_num=mul_orange_layer.backward(dorange_price)
dapple, dapple_num=mul_apple_layer.backward(dapple_price)

print(price)
print(dapple_num, dapple, dorange, dorange_num, dtax)

715.0000000000001
110.00000000000001 2.2 3.3000000000000003 165.0 650


In [24]:
class Relu:
    def __init__(self):
        self.mask=None

    def forawrd(self, x):
        self.mask=(x<=0)
        out=x.copy()
        out[self.mask]=0
        return out

    def backward(self, dout):
        dout[self.mask]=0
        dx=dout

        return dx

In [28]:
import numpy as np

x=np.array([[1.0, -0.5],[-2.0,3.0]])
print(x)

mask=(x<=0)
print(mask)

[[ 1.  -0.5]
 [-2.   3. ]]
[[False  True]
 [ True False]]


In [32]:
class Sigmoid:
    def __init__(self):
        self.out=None

    def forward(self, x):
        out=1/(1+np.exp(-x))
        self.out=out
        return out

    def backward(self, dout):
        dx=dout*(1.0-self.out)*self.out
        return dx

In [34]:
x=np.random.rand(2)
w=np.random.rand(2, 3)
B=np.random.rand(3)

x.shape
w.shape
B.shape
Y=np.dot(x,w)+B

In [40]:
X_dot_W=np.array([[0,0,0],[10,10,10]])
B=np.array([1,2,3])

print(X_dot_W)
print(X_dot_W+B)

[[ 0  0  0]
 [10 10 10]]
[[ 1  2  3]
 [11 12 13]]


In [42]:
dY=np.array([[1,2,3],[4,5,6]])
dY

array([[1, 2, 3],
       [4, 5, 6]])

In [44]:
dB=np.sum(dY, axis=0)
dB

array([5, 7, 9])

In [48]:
class Affine:
    def __init__(self, W, b):
        self.W=W
        self.b=b
        self.x=None
        self.dW=None
        self.db=None

    def forward(self, x):
        self.x=x
        out=np.dot(x, self.W) + self.b
        return out

    def backward(self, dout):
        dx=np.dot(dout, self.W.T)
        self.dW=np.dot(self.x.T, dout)
        self.db=np.sum(dout, axis=0)

        return dx

In [52]:
# 앞선 장의 함수 재구현
def softmax(a):
    c=np.max(a)
    exp_a=np.exp(a-c)
    sum_exp_a=np.sum(exp_a)
    y=exp_a/sum_exp_a
    return y
    
def cross_entropy_error(y, t):
    if y.ndim == 1:
        t=t.reshape(1, t.size)
        y=y.reshape(1, y.size)

    batch_size=y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7))/batch_size

In [54]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss=None
        self.y=None
        self.t=None

    def forward(self, x, t):
        self.t=t
        self.y=softmax(x)
        self.loss=cross_enrtopy_error(self.y, self.t)
        return self.loss

    def backward(self, dout=1):
        batch_size=self.t.shape[0]
        dx=(self.y-self.t)/batch_size
        return dx

In [62]:
import sys, os
sys.path.append(os.pardir)
from common.layers import *
from common.gradient import numerical_gradient
from collections import OrderedDict

class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        self.params={}
        self.params['W1']=weight_init_std*np.random.randn(input_size, hidden_size)
        self.params['b1']=np.zeros(hidden_size)
        self.params['W2']=weight_init_std*np.random.randn(hidden_size, output_size)
        self.params['b2']=np.zeros(output_size)

        self.layers=OrderedDict()
        self.layers['Affine1']=Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1']=Relu()
        self.layers['Affine2']=Affine(self.params['W2'], self.params['b2'])

        self.lastLayer=SoftmaxWithLoss()

    def predict(self, x):
        for layer in self.layers.values():
            x=layer.forward(x)

        return x

    def loss(self, x, t):
        y=self.predict(x)
        return self.lastLayer.forward(y, t)

    def accuracy(self, x, t):
        y=self.predict(x)
        y=np.argmax(y, axis=1)
        if t.ndim!=1 : t=np.argmax(t, axis=1)

        accuracy=np.sum(y==t) / float(x.shape[0])
        return accuracy

    def numerical_gradient(self, x, t):
        loss_W=lambda W: self.loss(x, t)

        grads={}
        grads['W1']=numerical_gradient(loss_W, self.params['W1'])
        grads['b1']=numerical_gradient(loss_W, self.params['b1'])
        grads['W2']=numerical_gradient(loss_W, self.params['W2'])
        grads['b2']=numerical_gradient(loss_W, self.params['b2'])
        return grads

    def gradient(self, x, t):
        self.loss(x, t)
        dout=1
        dout=self.lastLayer.backward(dout)
        layers=list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout=layer.backward(dout)

        grads={}
        grads['W1']=self.Layers['Affine1'].dW
        grads['b1']=self.Layers['Affine1'].db
        grads['W2']=self.Layers['Affine2'].dW
        grads['b2']=self.Layers['Affine2'].db
        return grads

In [64]:
#기울기 확인
from dataset.mnist import load_mnist
from two_layer_net import TwoLayerNet

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
network=TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
x_batch=x_train[:3]
t_batch=t_train[:3]
grad_numerical=network.numerical_gradient(x_batch, t_batch)
grad_backprop=network.gradient(x_batch, t_batch)

for key in grad_numerical.keys():
    diff=np.average(np.abs(grad_backprop[key] - grad_numerical[key]))
    print(key + ":" + str(diff))

W1:2.457481698930841e-13
b1:1.003254276529808e-12
W2:8.553409856342778e-13
b2:1.194600016130032e-10


In [70]:
iters_num = 100000
train_size=x_train.shape[0]
batch_size=100
learning_rate=0.1

train_loss_list=[]
train_acc_list=[]
test_acc_list=[]

iter_per_epoch=max(train_size/batch_size, 1)

for i in range(iters_num):
    batch_mask=np.random.choice(train_size, batch_size)
    x_batch=x_train[batch_mask]
    t_batch=t_train[batch_mask]

    grad=network.gradient(x_batch, t_batch)

    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key]-=learning_rate*grad[key]

    loss=network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

    if i%iter_per_epoch==0:
        train_acc=network.accuracy(x_train, t_train)
        test_acc=network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print(train_acc, test_acc)

0.15465 0.1518
0.9008333333333334 0.903
0.92195 0.9245
0.9300333333333334 0.9294
0.9424833333333333 0.9412
0.9506333333333333 0.9475
0.9564 0.9539
0.9616 0.9576
0.9651666666666666 0.9619
0.9686833333333333 0.9648
0.971 0.9667
0.9727666666666667 0.9656
0.97465 0.9694
0.9750333333333333 0.9682
0.97445 0.9673
0.9785333333333334 0.9714
0.9798 0.9717
0.9811666666666666 0.9716
0.982 0.9726
0.98025 0.9723
0.983 0.9736
0.9837666666666667 0.973
0.98375 0.9722
0.9846666666666667 0.9729
0.9860666666666666 0.974
0.9863166666666666 0.975
0.9873 0.9761
0.9873833333333333 0.9737
0.9878666666666667 0.9749
0.9884833333333334 0.9743
0.9889833333333333 0.9747
0.9886 0.9746
0.9888833333333333 0.9743
0.9904833333333334 0.9745
0.9904333333333334 0.974
0.99085 0.9745
0.9905166666666667 0.974
0.9918333333333333 0.9752
0.9923333333333333 0.975
0.9929833333333333 0.9752
0.9927666666666667 0.9751
0.9929166666666667 0.975
0.9933833333333333 0.974
0.9933333333333333 0.9748
0.9939333333333333 0.9751
0.9944 0.9736
0