In [1]:
import numpy

nn_architecture = [
    {"input_dim": x.shape[0], "output_dim": 10, "activation": "relu"},
    {"input_dim": 10, "output_dim": 1, "activation": "sigmoid"},
]

def init_layers(nn_architecture,seed = 2019):
    np.random.seed(seed)
    params_values = {}
    
    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        layer_input_size = layer['input_dim']
        layer_output_size = layer['output_dim']
        params_values[f'W_{layer_idx}'] = np.random.randn(layer_output_size,layer_input_size) * 0.1
#         params_values[f'b_{layer_idx}'] = np.zeros([layer_output_size,1])
        params_values[f'b_{layer_idx}'] = np.random.randn(layer_output_size, 1)
    
    return params_values

def CrossEntropyLoss(y_hat, y):
    y_hat = np.array(y_hat)
    y = np.array(y)
    y = y[None]
    assert len(y_hat) == len(y)
    assert np.all(np.isin(np.unique(y), [0,1]))
    m= len(y)
    loss = 0
    mask_zero = y == 0
    loss += np.sum(np.log(1-y_hat[mask_zero]+1e-15))
    mask_one = y == 1
    loss += np.sum(np.log(y_hat[mask_one]+1e-15))
    return -1*loss/m

def get_accuracy_value(y_hat, y, threshold = 0.5):
    y_hat = np.array(y_hat)
    y = np.array(y)
    class_hat = y_hat >= threshold
    return np.mean(class_hat == y)


def sigmoid(Z):
    return 1/(1+np.exp(-Z))

def ReLU(Z):
    return np.maximum(0, Z)

def sigmoid_backward(dA, Z):
    sig = sigmoid(Z)
    return dA * sig * (1 - sig)

def ReLU_backward(dA, Z):
    dZ = np.array(dA)
    dZ[dZ<0] = 0
    return dZ

def single_layer_forward_propagation(A_prev, W_curr, b_curr, activation = 'relu'):
    Z_curr = W_curr @ A_prev + b_curr
    if activation == 'relu':
        activation_func = ReLU
    elif activation == 'sigmoid':
        activation_func = sigmoid
    else:
        return Exception('Non-supported activation function')
    
    A_next = activation_func(Z_curr)
    return A_next, Z_curr

def fully_forward_propagation(X, params_values, nn_architecture):
    memory = {}
    A_curr = X
    
    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        A_prev = A_curr
        activation_func_curr = layer['activation']
        W_curr = params_values[f'W_{layer_idx}']
        b_curr = params_values[f'b_{layer_idx}']
        A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, activation = activation_func_curr)
        
        memory[f'A_{idx}'] = A_prev
        memory[f'Z_{layer_idx}'] = Z_curr
    
    return A_curr, memory


def single_layer_backward_propagation(dA_curr, W_curr, b_curr, Z_curr, A_prev, activation = 'relu'):
    m = A_prev.shape[1]
    
    if activation == 'relu':
        backward_activation_func = ReLU_backward
    elif activation == 'sigmoid':
        backward_activation_func = sigmoid_backward
    else:
        return Exception('Non-supported activation function')
    
    dZ_curr = backward_activation_func(dA_curr, Z_curr)
    dW_curr = np.dot(dZ_curr, A_prev.T)/m
    db_curr = np.sum(dZ_curr, axis=1, keepdims = True)/m
    dA_prev = np.dot(W_curr.T, dZ_curr)
    
    return dA_prev, dW_curr, db_curr


def fully_backward_propagation(y_hat, y, memory, params_values, nn_architecture):
    y_hat = np.array(y_hat)
    y = np.array(y)
    grads_values = {}
    
    dA_prev = -y/(y_hat+1e-15)+(1-y)/(1-y_hat+1e-15)
    
    for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):
        layer_idx_curr = layer_idx_prev+1
        activation_func_curr = layer['activation']
        
        dA_curr = dA_prev
        A_prev = memory[f'A_{layer_idx_prev}']
        Z_curr = memory[f'Z_{layer_idx_curr}']
        W_curr = params_values[f'W_{layer_idx_curr}']
        b_curr = params_values[f'b_{layer_idx_curr}']
        
        dA_prev, dW_curr, db_curr = single_layer_backward_propagation(dA_curr, W_curr, b_curr, 
                                                                      Z_curr, A_prev, activation_func_curr)
        
        grads_values[f'dW_{layer_idx_curr}'] = dW_curr
        grads_values[f'db_{layer_idx_curr}'] = db_curr
        
    return grads_values

def updates(params_values, grads_values, nn_architecture, learning_rate):
    for idx, layer in nn_architecture:
        layer_idx = idx + 1
        params_values[f'W_{layer_idx}'] -= learning_rate * grads_values[f'dW_{layer_idx}']
        params_values[f'b_{layer_idx}'] -= learning_rate * grads_values[f'db_{layer_idx}']
    return params_values


def fit(X, Y, nn_architecture, epochs, learning_rate):
    params_values = init_layers(nn_architecture, seed = 2019)
    cost_history = []
    accuracy_history = []
    grads_history = []
    
    for i in range(epochs):
        y_hat, cache = fully_forward_propagation(X, params_values, nn_architecture)
        cost = CrossEntropyLoss(y_hat, Y)
        cost_history.append(cost)
        accuracy = get_accuracy_value(y_hat, Y)
        accuracy_history.append(accuracy)
        
        grads_values = fully_backward_propagation(y_hat, Y, cache, params_values, nn_architecture)
        params_values = updates(params_values, grads_values, nn_architecture, learning_rate)
        
        grads_history.append(grads_values)
    
    return params, cost_history, accuracy_history, grads_history



In [2]:
import numpy as np

In [None]:
np.array([])