## 수치 미분을 이용한 심층 신경망 학습

In [1]:
import time
import numpy as np

## 유틸리티 함수

In [2]:
epsilon = 0.0001  # 수치미분을 하기 위해 엡실론 정의

def _t(x):
    return np.transpose(x)  # 행과 열 바꾸기

def _m(A, B):
    return np.matmul(A, B)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def mean_squared_error(h, y):
    return 1 / 2 * np.mean(np.square(h - y))

## 뉴런 구현

In [3]:
class Neuron:
    def __init__(self, W, b, a):
        # Model Parameter
        self.W = W
        self.b = b
        self.a = a
        
        # Gradients
        self.dW = np.zeros_like(self.W)  # self.W와 같은 크기의 0으로 된 메트릭스 생성
        self.db = np.zeros_like(self.b)

    def __call__(self, x):
        return self.a(_m(_t(self.W), x) + self.b) # activation((W^T)x + b)

## 심층신경망 구현

In [4]:
class DNN:
    def __init__(self, hidden_depth, num_neuron, num_input, num_output, activation=sigmoid):
        def init_var(i, o):
            return np.random.normal(0.0, 0.01, (i, o)), np.zeros((o,))
            # random.normal(0 : 평균, 0.01 : 표준 편차, (input, output), zeros에 output 크기 n,1에 맞는 bias 출력)
            # hidden_depth: hidden layer의 갯수
            # num_neuron : hidden layer 하나당 뉴런의 갯수
            # num_input : input layer의 뉴런의 갯수
            # num_output : output layer의 뉴런의 갯수
            
        self.sequence = list()
        # First hidden layer
        W, b = init_var(num_input, num_neuron)
        self.sequence.append(Neuron(W, b, activation))
        
        # Hidden layers
        for _ in range(hidden_depth - 1):  # hidden layer는 여러개가 들어가기 때문에 반복문을 사용
            # 첫번째 hidden layer는 위에서 정의했기 때문에 -1 해준다.
            W, b = init_var(num_neuron, num_neuron)
            self.sequence.append(Neuron(W, b, activation))

        # Output layer
        W, b = init_var(num_neuron, num_output)
        self.sequence.append(Neuron(W, b, activation))

    def __call__(self, x):
        for layer in self.sequence:
            x = layer(x)
        return x  # layer를 call하는 것은 위의 뉴런을 call하는것. 뉴런의 __call__이 리턴하게 됨.

    def calc_gradient(self, x, y, loss_func):
        def get_new_sequence(layer_index, new_neuron):
            new_sequence = list()
            for i, layer in enumerate(self.sequence):
                if i == layer_index:
                    new_sequence.append(new_neuron)
                else:
                    new_sequence.append(layer)
            return new_sequence
        
        def eval_sequence(x, sequence):
            for layer in sequence:
                x = layer(x)
            return x
        
        loss = loss_func(self(x), y)
        
        for layer_id, layer in enumerate(self.sequence): # iterate layer
            for w_i, w in enumerate(layer.W): # iterate W (row)
                for w_j, ww in enumerate(w): # iterate W (col)
                    W = np.copy(layer.W)
                    W[w_i][w_j] = ww + epsilon
                    
                    new_neuron = Neuron(W, layer.b, layer.a)
                    new_seq = get_new_sequence(layer_id, new_neuron)
                    h = eval_sequence(x, new_seq)
                    
                    num_grad = (loss_func(h, y) - loss) / epsilon  # (f(x+eps) - f(x)) / epsilon
                    layer.dW[w_i][w_j] = num_grad
            
                for b_i, bb in enumerate(layer.b): # iterate b
                    b = np.copy(layer.b)
                    b[b_i] = bb + epsilon
                    
                    new_neuron = Neuron(layer.W, b, layer.a)
                    new_seq = get_new_sequence(layer_id, new_neuron)
                    h = eval_sequence(x, new_seq)
                    
                    num_grad = (loss_func(h, y) - loss) / epsilon  # (f(x+eps) - f(x)) / epsilon
                    layer.db[b_i] = num_grad
        return loss

## 경사하강 학습법

In [5]:
def gradient_descent(network, x, y, loss_obj, alpha=0.01):  # alpha : learning_rate
    loss = network.calc_gradient(x, y, loss_obj)
    for layer in network.sequence:
        layer.W += -alpha * layer.dW
        layer.b += -alpha * layer.db
    return loss

## 동작 테스트

In [6]:
x = np.random.normal(0.0, 1.0, (10,))
y = np.random.normal(0.0, 1.0, (2,))

dnn = DNN(hidden_depth=5, num_neuron=32, num_input=10, num_output=2, activation=sigmoid)

t = time.time()
for epoch in range(100):
    loss = gradient_descent(dnn, x, y, mean_squared_error, 0.01)
    print('Epoch {}: Test loss {}'.format(epoch, loss))
print('{} seconds elapsed.'.format(time.time() - t))

Epoch 0: Test loss 1.1381219227907302
Epoch 1: Test loss 1.1317422379065443
Epoch 2: Test loss 1.125400422034505
Epoch 3: Test loss 1.119098113716565
Epoch 4: Test loss 1.112836888032469
Epoch 5: Test loss 1.106618254981134
Epoch 6: Test loss 1.1004436581090655
Epoch 7: Test loss 1.0943144733847712
Epoch 8: Test loss 1.0882320083160224
Epoch 9: Test loss 1.08219750130502
Epoch 10: Test loss 1.0762121212363769
Epoch 11: Test loss 1.070276967289371
Epoch 12: Test loss 1.0643930689672945
Epoch 13: Test loss 1.0585613863341163
Epoch 14: Test loss 1.052782810447565
Epoch 15: Test loss 1.047058163979899
Epoch 16: Test loss 1.0413882020122984
Epoch 17: Test loss 1.035773612992966
Epoch 18: Test loss 1.0302150198456346
Epoch 19: Test loss 1.0247129812166065
Epoch 20: Test loss 1.0192679928479966
Epoch 21: Test loss 1.0138804890634803
Epoch 22: Test loss 1.0085508443557187
Epoch 23: Test loss 1.0032793750626863
Epoch 24: Test loss 0.998066341120421
Epoch 25: Test loss 0.9929119478814787
Epoch 2