## 5.4 단순한 계층 구현하기

### 5.4.1 곱셈 계층

In [20]:
class MulLayer:
    def __init__(self):
        self.x = None
        self.y = None
    
    def forward(self, x, y):
        self.x = x
        self.y = y
        out = x * y
        
        return out
    
    def backward(self, dout):
        dx = dout * self.y # x와 y를 바꾼다. # 1 * tax
        dy = dout * self.x # 1 * apple_price
        
        return dx, dy

In [21]:
apple = 100
apple_num = 2
tax = 1.1

# 계층들
mul_apple_layer = MulLayer()
mul_tax_layer = MulLayer()

# 순전파
apple_price = mul_apple_layer.forward(apple, apple_num)
price = mul_tax_layer.forward(apple_price, tax)

price

220.00000000000003

In [22]:
# 역전파
dprice = 1
dapple_price, dtax = mul_tax_layer.backward(dprice)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

dapple, dapple_num, dtax

(2.2, 110.00000000000001, 200)

In [23]:
dapple_price

1.1

In [24]:
dtax

200

### 5.4.2 Addition layer

In [25]:
class AddLayer:
    def __init__(self):
        pass
    
    def forward(self, x, y):
        out = x + y
        return out
    
    def backward(self, dout):
        dx = dout * 1
        dy = dout * 1
        return dx, dy

In [26]:
apple = 100
apple_num = 2
orange = 150
orange_num = 3
tax = 1.1

# 계층들
mul_apple_layer = MulLayer()
mul_orange_layer = MulLayer()
add_apple_orange_layer = AddLayer()
mul_tax_layer = MulLayer()

# 순전파
apple_price = mul_apple_layer.forward(apple, apple_num) #(1)
orange_price = mul_orange_layer.forward(orange, orange_num) #(2)
all_price = add_apple_orange_layer.forward(apple_price, orange_price) #(3)
price = mul_tax_layer.forward(all_price, tax) #(4)

#역전파
dprice = 1
dall_price, dtax = mul_tax_layer.backward(dprice)
print(dall_price)
print(dtax)

dapple_price, dorange_price = add_apple_orange_layer.backward(dall_price)
print(dapple_price)
print(dorange_price)

dorange, dorange_num = mul_orange_layer.backward(dorange_price)
dapple, dapple_num = mul_apple_layer.backward(dapple_price)

price

1.1
650
1.1
1.1


715.0000000000001

### 5.4.4 ReLU Layer

In [27]:
class Relu:
    def __init__(self):
        self.mask = None
        
    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        
        return out
    
    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout
        
        return dx
    
    

In [29]:
import numpy as np
x = np.array([[1.0, -0.5], [-2.0, 3.0]])
x

array([[ 1. , -0.5],
       [-2. ,  3. ]])

In [30]:
mask = (x <= 0)
mask

array([[False,  True],
       [ True, False]])

In [32]:
x[mask] = 0

In [33]:
x[mask] 

array([0., 0.])

In [34]:
x

array([[1., 0.],
       [0., 3.]])

### 5.4.5 Sigmoid Layer

In [35]:
class Sigmoid:
    def __init__(self):
        self.out = None
        
    def forward(self, x):
        out = 1 / (1 + np.exp(-x))
        self.out = out
        
        return out
    
    def backward(self, dout):
        dx = dout * (1.0 - self.out) * self.out
        
        return dx

### 5.6.1 Affine Layer

In [36]:
X = np.random.rand(2) #입력
W = np.random.rand(2,3) # 가중치
B = np.random.rand(3) # 편향

X.shape # (2, )

(2,)

In [37]:
W.shape # (2, 3)

(2, 3)

In [38]:
B.shape # (3,)

(3,)

In [40]:
Y = np.dot(X, W) + B
Y

array([1.17393685, 1.94401368, 1.36498402])

array([[0.85725796, 0.98009064, 0.58849679],
       [0.35985167, 0.66866934, 0.19889952]])

### 5.6.2 Affine Layer for batch calculation

In [42]:
X_dot_W = np.array([[0, 0, 0], [10, 10, 10]])
B = np.array([1, 2, 3])

X_dot_W

array([[ 0,  0,  0],
       [10, 10, 10]])

In [43]:
X_dot_W +B

array([[ 1,  2,  3],
       [11, 12, 13]])

In [44]:
dY = np.array([[1, 2, 3], [4, 5, 6]])
dY

array([[1, 2, 3],
       [4, 5, 6]])

In [45]:
dB = np.sum(dY, axis = 0)
dB

array([5, 7, 9])

In [46]:
class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None
        
    def forward(self, x):
        self.x = x
        out = np.dot(x, self.W) + self.b
        
        return out
    
    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis = 0)
        
        return dx

### 5.6.3 Softmax-with-Loss Layer

In [47]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None
    
    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        return self.loss
    
    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t) / batch_size
        
        return dx