# Deep Neural Network Using Numerical Gradients
---

In [1]:
import time
import numpy as np

## Utility Functions

In [2]:
epsilon = 0.0001

def _t(x):
    return np.transpose(x)

def _m(A, B):
    return np.matmul(A, B)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def mean_squared_errors(h, y):
    return 1 / 2 * np.mean(np.square(h - y))

## Neuron

In [3]:
class Neuron:
    def __init__(self, W, b, a):
        self.W = W
        self.b = b
        self.a = a
        
        # Gradients
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)
        
    def __call__(self, x):
        return self.a(_m(_t(self.W), x) + self.b)

## Deep Neural Network Structure

In [4]:
class DNN:
    def __init__(self, hidden_depth, num_neuron, num_inputs, num_outputs, activation = sigmoid):
        def init_var(i, o):
            # initial values for W matrix and b vector
            return np.random.normal(0.0, 0.01, (i, o)), np.zeros((o,))
        
        self.sequence = list()
        # First Hidden Layer
        W, b = init_var(num_inputs, num_neuron)
        self.sequence.append(Neuron(W, b, activation))
        
        # Hidden Layer
        for _ in range(hidden_depth - 1):
            W, b = init_var(num_neuron, num_neuron)
            self.sequence.append(Neuron(W, b, activation))
        
        # Output Layer
        W, b = init_var(num_neuron, num_outputs)
        self.sequence.append(Neuron(W, b, activation))
        
    def __call__(self, x):
        for layer in self.sequence:
            x = layer(x)
        return x
    
    def calc_gradient(self, x, y, loss_func):
        def get_new_sequence(layer_index, new_neuron):
            new_sequence = list()
            for i, layer in enumerate(self.sequence):
                if i == layer_index:
                    new_sequence.append(new_neuron)
                else:
                    new_sequence.append(layer)
            return new_sequence
        
        def eval_sequence(x, sequence):
            for layer in sequence:
                x = layer(x)
            return x
        
        loss = loss_func(self(x), y) # 'self(x)' is identical to 'self.__init__(x)'.
        
        for layer_id, layer in enumerate(self.sequence):
            # w_i: the indices of neurons in a layer
            for w_i, w in enumerate(layer.W):
                for w_j, ww in enumerate(w):
                    W = np.copy(layer.W)
                    W[w_i][w_j] = ww + epsilon
                    
                    new_seq = get_new_sequence(layer_id, Neuron(W, layer.b, layer.a))
                    h = eval_sequence(x, new_seq)
                        
                    numerical_grad = (loss_func(h, y) - loss) / epsilon
                    layer.dW[w_i][w_j] = numerical_grad
                
            for b_i, bb in enumerate(layer.b):
                b = np.copy(layer.b)
                b[b_i] = bb + epsilon
                    
                new_seq = get_new_sequence(layer_id, Neuron(layer.W, b, layer.a))
                h = eval_sequence(x, new_seq)
                    
                numerical_grad = (loss_func(h, y) - loss) / epsilon
                layer.db[b_i] = numerical_grad
        return loss

## Gradient Descent Training

In [5]:
def gradient_descent(network, x, y, loss_obj, alpha = 0.01): # alpha: learning rate
    loss = network.calc_gradient(x, y, loss_obj)
    for layer in network.sequence:
        layer.W += -alpha * layer.dW
        layer.b += -alpha * layer.db
    return loss

## Training

In [6]:
x = np.random.normal(0.0, 1.0, (10,))
y = np.random.normal(0.0, 1.0, (2,))

dnn = DNN(hidden_depth = 5, num_neuron = 32, num_inputs = 10, num_outputs = 2, activation = sigmoid)

t = time.time()
for epoch in range(100):
    loss = gradient_descent(dnn, x, y, mean_squared_errors, 0.01)
    print(f'Epoch {epoch + 1}, Test Loss: {loss}.')
print(f'{time.time() - t} seconds elapsed.')

Epoch 1, Test Loss: 0.7508902553268118.
Epoch 2, Test Loss: 0.7466796412877036.
Epoch 3, Test Loss: 0.7424943554657073.
Epoch 4, Test Loss: 0.7383352805834973.
Epoch 5, Test Loss: 0.7342032641579066.
Epoch 6, Test Loss: 0.7300991177932463.
Epoch 7, Test Loss: 0.7260236165939724.
Epoch 8, Test Loss: 0.7219774986946541.
Epoch 9, Test Loss: 0.717961464905691.
Epoch 10, Test Loss: 0.7139761784716311.
Epoch 11, Test Loss: 0.7100222649396308.
Epoch 12, Test Loss: 0.7061003121342779.
Epoch 13, Test Loss: 0.7022108702345.
Epoch 14, Test Loss: 0.6983544519484891.
Epoch 15, Test Loss: 0.6945315327820956.
Epoch 16, Test Loss: 0.6907425513947959.
Epoch 17, Test Loss: 0.6869879100390994.
Epoch 18, Test Loss: 0.6832679750773109.
Epoch 19, Test Loss: 0.6795830775698748.
Epoch 20, Test Loss: 0.675933513930373.
Epoch 21, Test Loss: 0.6723195466410367.
Epoch 22, Test Loss: 0.6687414050231084.
Epoch 23, Test Loss: 0.6651992860564389.
Epoch 24, Test Loss: 0.661693355242805.
Epoch 25, Test Loss: 0.65822374