## import module

In [1]:
import time
import numpy as np

## Utility Function

In [2]:
def _t(x):
    return np.transpose(x)

def _m(A, B):
    return np.matmul(A, B)

## Implement Sigmoid

In [3]:
class Sigmoid:
    def __init__(self):
        self.last_o = 1  # 마지막 출력의 초기값 : 1
    
    def __call__(self, x): 
        self.last_o = 1.0 / (1.0 + np.exp(-x))  # 역전파 학습을 위해 마지막 출력을 기억
        return self.last_o
        
    def grad(self):
        return self.last_o * (1.0 - self.last_o)

## Mean Squared Error 구현

In [4]:
class MeanSquaredError:  
    def __init__(self):
        self.dh = 1   # 미분값을 기억
        self.last_diff = 1   # h-y를 기억
        
    def __call__(self, h, y):
        self.last_diff = h - y
        return 1/2 * np.mean(np.square(self.last_diff))
    
    def grad(self):  # 1/2 * (h - y)^2 -grad->  h - y
        return self.last_diff

In [5]:
class Dense:
    def __init__(self, W, b, a_obj):
        self.W = W
        self.b = b
        self.a = a_obj()   # sigmoid를 class로 구현했으므로, 인스턴스화 해야 함  
        # 각각의 Node마다 개별적으로 Sigmoid를 가지고 있음 -> 개별적으로 last_o 저장
        
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)
        self.dh = np.zeros_like(_t(self.W))
        
        self.last_x = np.zeros((self.W.shape[0]))
        self.last_h = np.zeros((self.W.shape[1]))        
        
    def __call__(self, x):
        self.last_x = x
        self.last_h = _m(_t(self.W), x) + self.b
        return self.a(self.last_h)
        
    def grad(self):  # dy/dh = W
        return self.W * self.a.grad()
        
    def grad_W(self, dh):
        grad = np.ones_like(self.W)
        grad_a = self.a.grad()
        for j in range(grad.shape[1]): # dy/dw = x  # 출력 뉴런 하나하나 마다 gradient를 구해서 넣음
            grad[:, j] = dh[j] * grad_a[j] * self.last_x
        return grad
    
    def grad_b(self, dh): # dy/db = 1
        return dh * self.a.grad()

## DNN with Back Propagation

In [6]:
class DNN:
    def __init__(self, hidden_depth, num_neuron, num_input, num_output, activation=Sigmoid):
        def init_var(i, o):  # i, o : num of input/output variables
            return np. random.normal(0.0, 0.01, (i,o)), np.zeros((o,))  # 초기 W, b 랜덤하게 할당
        
        self.sequence = list()
        # First hidden layer
        W, b = init_var(num_input, num_neuron)
        self.sequence.append(Dense(W, b, activation))
        
        # hidden layers
        for _ in range(hidden_depth):
            W, b = init_var(num_neuron, num_neuron)
            self.sequence.append(Dense(W, b, activation))   
        # Output layer
        W, b = init_var(num_neuron, num_output)
        self.sequence.append(Dense(W, b, activation))
        
    def __call__(self, x):  # Network에 저장된 sequence의 모든 layer를 이용하여 정방향으로 출력값 계산
        for layer in self.sequence: 
            x = layer(x)
        return x
    
    def calc_gradient(self, loss_obj):
        loss_obj.dh = loss_obj.grad()
        self.sequence.append(loss_obj)  # 임시로 넣어둔 것
        
        # back-pop loop
        for i in range(len(self.sequence) - 1, 0, -1):  # 뒤 레이어부터 앞으로 순회
            l1 = self.sequence[i]  # loss object가 됨 
            l0 = self.sequence[i-1]
            
            l0.dh = _m(l0.grad(), l1.dh)
            l0.dW = l0.grad_W(l1.dh)
            l0.db = l0.grad_b(l1.dh)
        
        self.sequence.remove(loss_obj)

## Gradient Descent

In [7]:
def gradient_descent(network, x, y, loss_obj, alpha=0.01):
    loss = loss_obj(network(x), y)  # Forward inference
    network.calc_gradient(loss_obj) # Back-propagation
    for layer in network.sequence:
        layer.W += -alpha * layer.dW
        layer.b += -alpha * layer.db
    return loss

## Test

In [8]:
x = np.random.normal(0.0, 1.0, (10, ))
y = np.random.normal(0.0, 1.0, (2, ))

t = time.time()
dnn = DNN(hidden_depth=5, num_neuron=32, num_input=10, num_output=2, activation=Sigmoid)
loss_obj = MeanSquaredError()
for epoch in range(100):
    loss = gradient_descent(dnn, x, y, loss_obj, 0.01)
    if epoch % 10 == 1:
        print('Epoch {}: Test loss {}'.format(epoch, loss))
print('{} seconds elapsed.'.format(time.time() - t))

Epoch 1: Test loss 0.9140658861478284
Epoch 11: Test loss 0.818844318331263
Epoch 21: Test loss 0.7424649182629921
Epoch 31: Test loss 0.684183665512473
Epoch 41: Test loss 0.6401939924390871
Epoch 51: Test loss 0.6066766346177853
Epoch 61: Test loss 0.5806961736127831
Epoch 71: Test loss 0.5601757887738992
Epoch 81: Test loss 0.5436742871621133
Epoch 91: Test loss 0.5301860918038411
0.12703275680541992 seconds elapsed.
