## 수치 미분을 이용한 심층 신경망 학습

### without back propagation

## Import modules

In [49]:
import time
import numpy as np

## 유틸리티 함수

In [50]:
epsilon = 0.0001

def _t(x): ## transpose
    return np.transpose(x)

def _m(A, B):  ## 행렬곱
    return np.matmul(A, B)

def sigmoid(x): ## sigmoid함수
    return 1 / (1 + np.exp(-x))

def mean_squared_error(h, y):  ## MSE
    return 1 / 2 * np.mean(np.square(h - y))

## Dense Layer 구현

In [51]:
class Dense:
    def __init__(self, W, b, a):
        self.W = W
        self.b = b
        self.a = a ## activation function
        
        # 미분값을 담을 변수도 초기화해주기
        # zeros_like -> 파라미터와 같은 shape, 자료형으로 초기화해줌
        self.dW = np.zeros_like(self.W) 
        self.db = np.zeros_like(self.b)

    def __call__(self, x):
        return self.a(_m(_t(self.W),x) + self.b)  ## matmul((iXo)T , iX1) + oX1 

## 심층신경망 구현

- hidden_depth = hidden layer의 개수
- num_neuron = layer당 뉴런의 개수
- num_input = input의 개수
- num_output = output의 개수

In [52]:
class DNN:
    def __init__(self, hidden_depth, num_neuron, num_input, num_output, activation=sigmoid):
        
        def init_var(i, o):
            return np.random.normal(0.0, 0.01, (i, o)), np.zeros((o,))

        self.sequence = list()
        # First hidden layer
        W, b = init_var(num_input, num_neuron)
        self.sequence.append(Dense(W, b, activation))
        
        # Hidden layers
        for _ in range(hidden_depth - 1):
            W, b = init_var(num_neuron, num_neuron)
            self.sequence.append(Dense(W, b, activation))

        # Output layer
        W, b = init_var(num_neuron, num_output)
        self.sequence.append(Dense(W, b, activation))

    def __call__(self, x):
        for layer in self.sequence:
            x = layer(x)
        return x

    def calc_gradient(self, x, y, loss_func):
        
        # 특정 layer를 새로운 layer로 바꿔준다
        def get_new_sequence(layer_index, new_layer):
            new_sequence = list()
            for i, layer in enumerate(self.sequence):
                if i == layer_index:
                    new_sequence.append(new_layer)
                else:
                    new_sequence.append(layer)
            return new_sequence
        
        def eval_sequence(x, sequence):
            for layer in sequence:
                x = layer(x)
            return x
        
        # 기준이 되는 loss
        loss = loss_func(self(x), y) # self(x) --> __call__에 들어가서 x를 return받음
        
        for layer_id, layer in enumerate(self.sequence):
            
            ## weight 파라미터를 바꿔보기
            for w_i, w in enumerate(layer.W): ## w -> vector
                for w_j, ww in enumerate(w): ## ww -> scalar
                    W = np.copy(layer.W) ## 새로운 layer를 만들기 위해서 복사
                    W[w_i][w_j] = ww + epsilon
                    
                    new_layer = Dense(W, layer.b, layer.a)
                    new_seq = get_new_sequence(layer_id, new_layer)
                    h = eval_sequence(x, new_seq)
                    
                    num_grad = (loss_func(h,y) - loss) / epsilon  ## (f(x+eps) - f(x)) / eps
                    
                    layer.dW[w_i][w_j] = num_grad  ## num_grad는 스칼라 하나만 바뀐 것
                
            ## bias 파라미터를 바꿔보기
            for b_i, bb in enumerate(layer.b): ## bb -> scalar
                b = np.copy(layer.b) ## 새로운 layer를 만들기 위해서 복사
                b[b_i] = bb + epsilon
                
                new_layer = Dense(layer.W, b, layer.a)
                new_seq = get_new_sequence(layer_id, new_layer)
                h = eval_sequence(x, new_seq)
                
                num_grad = (loss_func(h,y) - loss) / epsilon  ## (f(x+eps) - f(x)) / eps
                layer.db[b_i] = num_grad  ## num_grad는 스칼라 하나만 바뀐 것
                
        return loss


## 경사하강 학습법

In [53]:
def gradient_descent(network, x, y, loss_obj, alpha=0.01):
    loss = network.calc_gradient(x, y, loss_obj)
    for layer in network.sequence:
        layer.W += -alpha * layer.dW
        layer.b += -alpha * layer.db
    return loss

## 동작 테스트

In [54]:
x = np.random.normal(0.0, 1.0, (10,))
y = np.random.normal(0.0, 1.0, (2,))

dnn = DNN(hidden_depth=5, num_neuron=32, num_input=10, num_output=2, activation=sigmoid)

t = time.time()
for epoch in range(100):
    loss = gradient_descent(dnn, x, y, mean_squared_error, 0.01)
    print('Epoch {}: Test loss {}'.format(epoch, loss))
print('{} seconds elapsed.'.format(time.time() - t))

Epoch 0: Test loss 0.30575528692745185
Epoch 1: Test loss 0.3040428991880344
Epoch 2: Test loss 0.3023399880847716
Epoch 3: Test loss 0.30064675017173326
Epoch 4: Test loss 0.2989633745634927
Epoch 5: Test loss 0.2972900428933639
Epoch 6: Test loss 0.2956269292842166
Epoch 7: Test loss 0.29397420033128385
Epoch 8: Test loss 0.29233201509685847
Epoch 9: Test loss 0.29070052511636074
Epoch 10: Test loss 0.28907987441569577
Epoch 11: Test loss 0.2874701995392075
Epoch 12: Test loss 0.28587162958816603
Epoch 13: Test loss 0.2842842862691399
Epoch 14: Test loss 0.2827082839518895
Epoch 15: Test loss 0.28114372973640317
Epoch 16: Test loss 0.27959072352858805
Epoch 17: Test loss 0.2780493581241929
Epoch 18: Test loss 0.2765197193004794
Epoch 19: Test loss 0.2750018859149988
Epoch 20: Test loss 0.27349593001149636
Epoch 21: Test loss 0.27200191693181064
Epoch 22: Test loss 0.27051990543398374
Epoch 23: Test loss 0.26904994781547964
Epoch 24: Test loss 0.26759209004158074
Epoch 25: Test loss 0

## 변수가 늘어나면 O(n제곱) 으로 계산이 늘어남

## ~> back propagation 필요