In [1]:
import numpy as np

In [None]:
import numpy

nn_architecture = [
    {"input_dim": x.shape[0], "output_dim": 10, "activation": "relu"},
    {"input_dim": 10, "output_dim": 1, "activation": "sigmoid"},
]


def init_layers(nn_architecture, seed):
    params_values = {}
    
    np.random.seed(seed)
    for layer_idx_prev, layer in enumerate(nn_architecture):
        layer_idx_curr = layer_idx_prev+1
        output_dim = layer['output_dim']
        input_dim = layer['input_dim']
        params_values[f'W_{layer_idx_curr}'] = np.random.randn(output_dim,input_dim)*0.1
        params_values[f'b_{layer_idx_curr}'] = np.zeros([output_dim,1])
    return params_values


def sigmoid(Z):
    A = 1/(1+np.exp(-Z))
    return A


def ReLU(Z):
    A = np.maximum(0,Z)
    return A


def sigmoid_backward(dA_curr, Z_curr):
    A_curr = sigmoid(Z_curr)
    dZ_curr = dA_curr*A_curr(1-A_curr)
    
    
def ReLU_backward(dA_curr, Z_curr):
    dZ_curr = np.array(dA_curr)
    dZ_curr[Z_curr<0] = 0
    return dZ_curr

def CrossEntropyLoss(y_hat, y):
    y_hat = np.array(y_hat)
    y = np.array(y)
    y=y[None]
    assert len(y) == len(y_hat)
    assert np.all(np.isin(np.unique(y),[0,1]))
    m = len(y)
    loss = 0
    mask_zeros = y==0
    loss += np.sum(np.log(1-y_hat[mask_zeros]+1e-15))
    mask_ones = y==1
    loss += np.sum(np.log(y_hat[mask_ones]+1e-15))
    return -1*loss/m


def Accuracy(y_hat,y,threshold = 0.5):
    y_hat = np.array(y_hat)
    y = np.array(y)
    y=y[None]
    assert len(y) == len(y_hat)
    assert np.all(np.isin(np.unique(y),[0,1]))
    class_hat = y_hat>=threshold
    return np.mean(class_hat == y)


def single_layer_foward_propogation(W_curr, b_curr, A_prev, activation='relu'):
    Z_curr = W_curr@A_prev+b_curr
    
    if activation == 'relu':
        activation_func = ReLU
    elif activation == 'sigmoid':
        activation_func = sigmoid
    else:
        return Exception('Non-supported activation function')
    
    A_curr = activation_func(Z_curr)
    
    return A_curr, Z_curr


def fully_forward_propogation(X, params_values, nn_architecture):
    A_curr = X
    
    memory = {}
    for layer_idx_prev, layer in enumerate(nn_architecture):
        layer_idx_curr = layer_idx_prev+1
        W_curr = params_values[f'W_{layer_idx_curr}']
        b_curr = params_values[f'b_{layer_idx_curr}']
        activation = layer['activation']
        A_prev = A_curr
        A_curr, Z_curr = single_layer_foward_propogation(W_curr, b_curr, A_prev, activation)
        memory[f'A_{layer_idx_prev}'] = A_prev
        memory[f'Z_{layer_idx_curr}'] = Z_curr
    return A_curr, memory


def single_layer_backward_propogation(dA_curr, A_prev, W_curr, Z_curr, activation = 'relu'):
    m = A_prev.shape[1]
    if activation == 'relu':
        activation_func = ReLU_backward
    elif activation == 'sigmoid':
        activation_func = sigmoid_backward
    else:
        return Exception('Non-supported activation function')
    
    dZ_curr = activation_func(dA_curr, Z_curr)
    dW_curr = np.dot(dZ_curr, A_prev.T)/m
    db_curr = np.sum(dZ_curr, axis=1, keepdims=True)/m
    dA_prev = np.dot(W_curr.T,dZ_curr)
    return dW_curr, db_curr, dA_prev

def fully_backward_propagation(y, y_hat, memory, params_values, nn_architecture):
    y = np.array(y)
    y_hat = np.array(y_hat)
    grads_values = {}
    
    dA_prev = -y/(y_hat+1e-15)+(1-y)/(1-y_hat+1e-15)
    for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):
        layer_idx_curr = layer_idx_prev+1
        activation = layer['activation']
        dA_curr = dA_prev
        Z_curr = memory[f'Z_{layer_idx_curr}']
        A_prev = memory[f'A_{layer_idx_prev}']
        W_curr = params_values[f'W_{layer_idx_curr}']
        dW_curr, db_curr, dA_prev = single_layer_backward_propogation(dA_curr, A_prev, W_curr, Z_curr, activation)
        grads_values[f'dW_{layer_idx_curr}'] = dW_curr
        grads_values[f'db_{layer_idx_curr}'] = db_curr
    return grads_values


def updates(params_values, grads_values, nn_architecture, learning_rate):
    for layer_idx_prev, layer in enumerate(nn_architecture):
        layer_idx_curr = layer_idx_prev+1
        params_values[f'W_{layer_idx_curr}'] -= learning_rate*grads_values[f'dW_{layer_idx_curr}']
        params_values[f'b_{layer_idx_curr}'] -= learning_rate*grads_values[f'db_{layer_idx_curr}']
    return params_values


def fit(X, Y, epochs, nn_architecture,learning_rate, seed):
    params_values = init_layers(nn_architecture,seed = 2018)
    loss_history = []
    accuracy_history = []
    for _ in range(epochs):
        y_hat, cache = fully_forward_propogation(X, params_values, nn_architecture)
        loss = CrossEntropyLoss(y_hat, Y)
        loss_history.append(loss)
        accuracy = Accuracy(y_hat, Y)
        accuracy_history.append(accuracy)
        
        grads_values = fully_backward_propagation(Y,y_hat, cache, params_values, nn_architecture)
        params_values = updates(params_values, grads_values, nn_architecture, learning_rate)
    return params_values, loss_history, 