## 오차역전파법(Backpropagation)

In [2]:
class AddNode:
    def __init__(self):
        pass
    
    def forward(self,x,y):
        self.x = x
        self.y = y
        out = self.x+self.y
        return out
    
    def backward(self, dout):
        dx = dout*1
        dy = dout*1
        return dx, dy
    
class MulNode:
    def __init__(self):
        self.x = None
        self.y = None
        
    def forward(self,x,y):
        self.x = x
        self.y = y
        out = self.x*self.y
        return out
    
    def backward(self, dout):
        dx = self.y*dout
        dy = self.x*dout
        return dx, dy
        

In [3]:
apple = 100
apple_num = 2
banana = 300
banana_num = 5
tax = 1.1

In [4]:
### 흐름
# ### forward
# apple_buy_result = apple_buy.forward(apple_price,apple_num)
# banana_buy_result = banana_buy.forward(banana_price,banana_num)
# apple_total = MulNode(apple_buy_result,tax)
# apple_total_price = apple_total.forward(apple_buy_result,tax)
# banana_total = MulNode(banana_buy,tax)
# banana_total_price = banana_total.forward(banana_buy_result,tax)

# total_price = AddNode()
# total_price_result = total_price.forward(apple_total_price,banana_total_price)
# total_price_result

# ### backward
# total_price.backward(total_price_result)
# print(apple_total_price)
# print(banana_total_price)

In [5]:
# mul_apple_layer = MulNode()
# mul_banana_layer = MulNode()

# apple_price = mul_apple_layer.forward(apple_price, apple_num)
# banana_price = mul_banana_layer.forward(banana_price, banana_num)

# apple_total_price = mul_apple_layer.forward(apple_price,tax)
# banana_total_price = mul_banana_layer.forward(banana_price,tax)

# dout = 1
# dx, dy = mul_apple_layer.backward(dout)

In [6]:
mul_apple_layer = MulNode()
mul_apple_tax_layer = MulNode()

In [7]:
apple_price = mul_apple_layer.forward(apple, apple_num)

In [8]:
apple_total_price = mul_apple_tax_layer.forward(apple_price, tax)

In [9]:
apple_total_price

220.00000000000003

In [10]:
dout = 1
dapple, dtax = mul_apple_tax_layer.backward(dout)

In [11]:
print(dapple)
print(dtax)

1.1
200


In [12]:
mul_banana_layer = MulNode()
mul_banana_tax_layer = MulNode()

In [13]:
banana_price = mul_banana_layer.forward(banana,banana_num)
banana_total_price = mul_banana_tax_layer.forward(banana_price,tax)

In [14]:
dout = 1
dbanana, dtax = mul_banana_tax_layer.backward(dout)

In [15]:
print(dbanana)
print(dtax)

1.1
1500


In [16]:
total_layer = AddNode()

In [17]:
total_layer.forward(apple_total_price,banana_total_price)

1870.0000000000002

In [18]:
total_layer.backward(dout=1)

(1, 1)

In [19]:
print(total_layer.x)
print(total_layer.y)

220.00000000000003
1650.0000000000002


In [20]:
mul_apple_layer = MulNode()
mul_banana_layer = MulNode()
add_apple_banana_layer = AddNode()
mul_tax_layer = MulNode()

In [21]:
apple_price = mul_apple_layer.forward(apple, apple_num)
banana_price = mul_banana_layer.forward(banana, banana_num)

In [22]:
all_price = add_apple_banana_layer.forward(apple_price, banana_price)

In [23]:
total_price = mul_tax_layer.forward(all_price,tax)

In [24]:
print(f'apple_price = {apple_price}')
print(f'banana_price = {banana_price}')
print(f'all_price = {all_price}')
print(f'total_price = {total_price}')


apple_price = 200
banana_price = 1500
all_price = 1700
total_price = 1870.0000000000002


In [25]:
### 역전파(backward)

In [26]:
dout = 1
dall_price,dtax = mul_tax_layer.backward(dout)

In [27]:
dall_price

1.1

In [28]:
dapple_price, dbanana_price = add_apple_banana_layer.backward(dall_price)

In [29]:
dapple_price

1.1

In [30]:
dapple, dapple_num = mul_apple_layer.backward(dapple_price)


In [31]:
print(dapple)
print(dapple_num)

2.2
110.00000000000001


In [32]:
dbanana, dbanana_num = mul_banana_layer.backward(dbanana_price)

In [33]:
print(dbanana)
print(dbanana_num)

5.5
330.0


## Relu, Sigmoid, Affine, Loss

In [34]:
import numpy as np

def sigmoid(x):
    return 1/(1+np.exp(-x))

def relu(x):
    return np.maximum(0,x)

def softmax(x):
    c = np.max(x,axis=1).reshape(-1,1)
    x = x-c
    return np.exp(x)/np.sum(np.exp(x),axis=1).reshape(-1,1)

def categorical_crossentropy(y,t):
    return np.mean(-t*np.log(y))

In [35]:
### Relu, Sigmoid, Affine, Loss

class Relu:
    def __init__(self):
        self.mask = None    
        
    def forward(self,x):
        self.mask = (x <=0)
        out = x.copy()
        out[self.mask] = 0
        return out
    
    def backward(self,dout):
        dout[self.mask] = 0
        dx = dout
        return dx
    
class Sigmoid:
    def __init__(self):
        self.out = None
    
    def forward(self,x):
        out = sigmoid(x)
        self.out = out
        return self.out
    def backward(self,dout):
        dx = ((1-self.out)*self.out)*dout
        return dx
    
class Affine:
    def __init__(self,W,b):
        self.W = W
        self.b = b
        self.x = None
        self.origin_shape = None
        self.dW = None
        self.db = None
    
    def forward(self,x):
        self.origin_shape = x.shape
        self.x = x
        out = np.dot(x, self.W) + self.b
        return out
    
    def backward(self,dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T,dout)
        self.db = np.sum(dout, axis=0)
        dx = dx.reshape(self.origin_shape)
        return dx, self.dW, self.db
    
class Loss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None
    
    def forward(self,y,t):
        self.y = y
        self.t = t    
        self.loss = categorical_crossentropy(self.y, self.t)
        return self.loss
    
    def backward(self,dout=1):
        dx = (self.y - self.t)*dout
        return dx

In [36]:
### Loss

In [37]:
y = softmax(np.random.randn(10,3))

In [38]:
t = np.random.randn(10,3)

In [39]:
x = np.argmax(t, axis=1)

In [40]:
t = np.zeros((10,3))

In [41]:
for i,k in enumerate(x):
    t[i,k] = 1

In [42]:
t

array([[0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.]])

In [43]:
loss = Loss()

In [44]:
loss.forward(y,t)

0.4467397593773725

In [45]:
loss.backward()

array([[ 0.09179273,  0.37997585, -0.47176858],
       [ 0.26314346, -0.86044518,  0.59730172],
       [ 0.15898581,  0.09182098, -0.25080679],
       [-0.76920511,  0.2528827 ,  0.51632241],
       [ 0.55685033, -0.78211035,  0.22526002],
       [-0.44170283,  0.40343413,  0.0382687 ],
       [ 0.60360181, -0.77511555,  0.17151374],
       [ 0.06443593,  0.9195728 , -0.98400872],
       [ 0.11116678,  0.12398089, -0.23514766],
       [ 0.40199243,  0.24352868, -0.64552111]])

In [46]:
### Affine
x = np.random.randn(5,3)

In [47]:
x.shape

(5, 3)

In [48]:
Affine1 = Affine(np.random.randn(5,3), np.zeros(3))

In [49]:
Affine1.forward(np.random.randn(10,5))

array([[-2.63090643,  0.68956378,  1.31813763],
       [ 2.10518517,  5.87112935,  7.18932288],
       [ 1.33635876, -0.36513064, -1.11591854],
       [-2.64766777, -1.40175049, -2.00041305],
       [-0.70559529, -2.04566188, -3.44760226],
       [ 0.90943562,  0.66384302, -0.15792078],
       [ 0.31740652,  1.34774451,  2.23343871],
       [-1.86961775,  0.64238883,  1.4028191 ],
       [-1.83042134,  0.83924837,  1.75027077],
       [ 2.21539459,  0.87230628,  0.30926659]])

In [50]:
Affine1.backward(np.random.randn(10,3))

(array([[-2.69196838, -0.46197011,  1.92793711, -0.45324269, -0.40030777],
        [-0.98071876, -1.39884737, -1.04416644,  1.85949241,  1.0247664 ],
        [ 0.80803602, -1.19693151, -2.38291547,  2.26697361,  1.41127749],
        [ 1.18069562,  1.4972208 ,  1.89908576, -2.591407  , -0.85175898],
        [-1.47489614,  0.05220074,  1.23918945, -0.56840663, -0.56612962],
        [-3.70103194, -3.02399912, -1.94995174,  4.18778083,  1.44978144],
        [ 2.94261411,  2.87046178,  2.19786019, -4.08629221, -1.59930356],
        [ 3.4957845 , -1.18563612, -5.71412808,  4.01845104,  2.06619647],
        [ 0.9431248 ,  0.17508733, -0.71870632,  0.1821647 ,  0.11368873],
        [ 3.14362002,  0.47069881, -1.6503601 ,  0.13402762,  0.69015966]]),
 array([[ 1.98634406,  1.64852775,  1.37577383],
        [ 3.77198861,  0.4547168 ,  0.25803468],
        [ 0.81268599,  3.8457226 , -0.62262711],
        [ 9.59820683,  1.94855397,  3.07705193],
        [-5.72467085,  1.7504034 , -0.57453625]]),
 

In [51]:
### Relu
x = np.array([[-1,0],[1,2],[2,0]])
y = np.array([[7,6],[-2,5],[4,1]])

In [52]:
x[x <= 0] = 0
x

array([[0, 0],
       [1, 2],
       [2, 0]])

In [53]:
relu_d = Relu()
relu_d.forward(x)

array([[0, 0],
       [1, 2],
       [2, 0]])

In [54]:
relu_d.backward(y)

array([[ 0,  0],
       [-2,  5],
       [ 4,  0]])

In [55]:
### Sigmoid
xx = Sigmoid()

In [56]:
xx.forward(np.random.randn(2,3))

array([[0.88878197, 0.20736302, 0.38558998],
       [0.83827747, 0.30434422, 0.68464343]])

In [57]:
xx.backward(np.random.randn(2,3))

array([[ 0.08034092,  0.1189563 , -0.07759364],
       [-0.07579505, -0.2286068 , -0.12595433]])