## 수치 미분을 이용한 심층 신경망 학습

## Import modules

In [3]:
import time
import numpy as np

## 유틸리티 함수

In [4]:
epsilon = 0.0001

def _t(x):
    return np.transpose(x)

def _m(A, B):
    return np.matmul(A, B)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def mean_squared_error(h, y):
    return 1 / 2 * np.mean(np.square(h - y))

## Dense Layer 구현

In [22]:
class Dense:
    def __init__(self, W, b, a):
        self.W=W
        self.b=b
        self.a=a
        
        self.dW=np.zeros_like(self.W) #np.zeros_like 함수를 이용해서 입력받은 W와 크기가 동일한 배열을 초기화
        self.db=np.zeros_like(self.b)
        

    def __call__(self, x):
        return self.a(_m(_t(self.W),x)+self.b)  #matrix의 multiplication을 위해서 transpose 된 행렬을 이용

## 심층신경망 구현

In [23]:
class DNN:
    def __init__(self, hidden_depth, num_neuron, num_input, num_output, activation=sigmoid):
        def init_var(i, o):
            return np.random.normal(0.0, 0.01, (i, o)), np.zeros((o,))#np.random.normal을 활용해, 정규분포 기반의 행렬 생성 (i,o) matrix

        self.sequence = list()
        # First hidden layer
        W,b=init_var(num_input,num_neuron)
        self.sequence.append(Dense(W,b,activation))
        
        # Hidden layers
        for _ in range(hidden_depth-1):
            W,b=init_var(num_neuron,num_neuron)
            self.sequence.append(Dense(W,b,activation))

        # Output layer
        W,b=init_var(num_neuron,num_output)
        self.sequence.append(Dense(W,b,activation))

    def __call__(self, x):
        for layer in self.sequence:
            x = layer(x)
        return x

    def calc_gradient(self, x, y, loss_func):
        def get_new_sequence(layer_index,new_layer): #새로운 sequence을 생성하는 함수
            new_sequence=list()
            for i,layer in enumerate(self.sequence):
                if i== layer_index:
                    new_sequence.append(new_layer)
                else:
                    new_sequence.append(layer)
            return new_sequence
        
        def eval_sequence(x,sequence): #새로 만들어진 sequence에 대해 loss function을 평가하는 함수
            for layer in sequence:
                x=layer(x)
            return x
        
        loss=loss_func(self(x),y) #첫번째 loss function을 구하게 됨
        
        for layer_id, layer in enumerate(self.sequence): #모든 layer에 대해서 모든 parameter에 대한 미분 진행 필요
            for w_i , w in enumerate(layer.W): #weight에 대한 iteration
                for w_j, ww in enumerate(w):
                    W=np.copy(layer.W)
                    W[w_i][w_j]=ww+epsilon #epsilon 만큼 옮긴 새로운 parameter
                    
                    new_layer=Dense(W,layer.b,layer.a)
                    new_sequence=get_new_sequence(layer_id, new_layer)
                    
                    h=eval_sequence(x,new_sequence)
                     
                    #(f(x+eps)-f(x))/eps
                    num_grad=(loss_func(h,y)-loss)/epsilon 
                    layer.dW[w_i][w_j]=num_grad  # 해당 위치에 바뀐 parameter 등록
                    
            for b_i , bb in enumerate(layer.b): #vector에 대한 iteration
                b=np.copy(layer.b)
                b[b_i]=bb+epsilon

                new_layer=Dense(W,layer.b,layer.a)
                new_sequence=get_new_sequence(layer_id, new_layer)

                h=eval_sequence(x,new_sequence)

                #(f(x+eps)-f(x))/eps
                num_grad=(loss_func(h,y)-loss)/epsilon
                layer.db[b_i]=num_grad
                
        return loss

## 경사하강 학습법

In [24]:
def gradient_descent(network, x, y, loss_obj, alpha=0.01):
    loss = network.calc_gradient(x, y, loss_obj)
    for layer in network.sequence:
        layer.W += -alpha * layer.dW
        layer.b += -alpha * layer.db
    return loss

## 동작 테스트

In [25]:
x = np.random.normal(0.0, 1.0, (10,))
y = np.random.normal(0.0, 1.0, (2,))

dnn = DNN(hidden_depth=5, num_neuron=32, num_input=10, num_output=2, activation=sigmoid)

t = time.time()
for epoch in range(100):
    loss = gradient_descent(dnn, x, y, mean_squared_error, 0.01)
    print('Epoch {}: Test loss {}'.format(epoch, loss))
print('{} seconds elapsed.'.format(time.time() - t))

Epoch 0: Test loss 0.3426225862698866
Epoch 1: Test loss 0.34084309025406934
Epoch 2: Test loss 0.3390729684121342
Epoch 3: Test loss 0.33731230561137143
Epoch 4: Test loss 0.33556118302499705
Epoch 5: Test loss 0.3338196781478848
Epoch 6: Test loss 0.332087864815246
Epoch 7: Test loss 0.3303658132244467
Epoch 8: Test loss 0.3286535899594526
Epoch 9: Test loss 0.32695125801815317
Epoch 10: Test loss 0.3252588768422475
Epoch 11: Test loss 0.32357650234959756
Epoch 12: Test loss 0.3219041869690821
Epoch 13: Test loss 0.3202419796777387
Epoch 14: Test loss 0.31858992603998976
Epoch 15: Test loss 0.31694806824909416
Epoch 16: Test loss 0.315316445170423
Epoch 17: Test loss 0.31369509238667725
Epoch 18: Test loss 0.3120840422448429
Epoch 19: Test loss 0.3104833239047203
Epoch 20: Test loss 0.3088929633890226
Epoch 21: Test loss 0.3073129836348338
Epoch 22: Test loss 0.30574340454644827
Epoch 23: Test loss 0.30418424304929437
Epoch 24: Test loss 0.30263551314506404
Epoch 25: Test loss 0.3010