In [1]:
import numpy as np

In [None]:
import numpy

nn_architecture = [
    {"input_dim": x.shape[0], "output_dim": 10, "activation": "relu"},
    {"input_dim": 10, "output_dim": 1, "activation": "sigmoid"},
]


def init_layers(nn_architecture, seed = 2019):
    params_values = {}
    np.random.seed(seed)
    for layer_idx_prev, layer in enumerate(nn_architecture):
        layer_idx_curr = layer_idx_prev + 1
        output_dim = layer['output_dim']
        input_dim = layer['input_dim']
        params_values[f'W_{layer_idx_curr}'] = np.random.randn(output_dim,input_dim)*0.1
        params_values[f'b_{layer_idx_curr}'] = np.zeros([output_dim,1])
    return params_values


def sigmoid(Z):
    A = 1/(1+np.exp(-Z))
    return A

def ReLU(Z):
    A = np.maximum(0, Z)
    return A


def sigmoid_backward(dA, Z):
    sig = sigmoid(Z)
    dZ = dA * sig (1-sig)
    return dZ

def ReLU_backward(dA, Z):
    dZ = np.array(dA)
    dZ[dZ<0] = 0
    return dZ

def CrossEntropyLoss(y_hat, y):
    y_hat = np.array(y_hat)
    y = np.array(y)
    y = y[None]
    assert len(y) == len(y_hat)
    assert np.all(np.isin(np.unique(y),[0,1]))
    m = len(y)
    loss = 0
    mask_zeros = y==0
    loss += np.sum(np.log(1-y_hat[mask_zeros]+1e-15))
    mask_ones = y==1
    loss += np.sum(np.log(y_hat[mask_ones]+1e-15))
    return -1*loss/m


def Accuracy(y_hat, y, threshold):
    y_hat = np.array(y_hat)
    y = np.array(y)
    y = y[None]
    assert len(y) == len(y_hat)
    assert np.all(np.isin(np.unique(y),[0,1]))
    class_hat = y_hat >= threshold
    return np.mean(class_hat == y)


def single_layer_forward_propogation(W_curr, A_prev, b_curr, activation='relu'):
    Z_curr = W_curr @ A_prev + b_curr
    
    if activation == 'relu':
        activation_func = ReLU
    elif activation == 'sigmoid':
        activation_func = sigmoid
    else:
        return Exception('Non-supported activation function')
    
    A_curr = activation_func(Z_curr)
    return A_curr, Z_curr


def fully_forward_propagation(X, params_values, nn_architecture):
    memory = {}
    
    A_curr = X
    for layer_idx_prev, layer in enumerate(nn_architecture):
        layer_idx_curr = layer_idx_prev+1
        A_prev = A_curr
        activation_func = layer['activation']
        W_curr = params_values[f'W_{layer_idx_curr}']
        b_curr = params_values[f'b_{layer_idx_curr}']
        A_curr, Z_curr = single_layer_forward_propogation(W_curr, A_prev, b_curr, activation_func)
        
        memory[f'A_{layer_idx_prev}'] = A_prev
        memory[f'Z_{layer_idx_curr}'] = Z_curr
    
    return A_curr, memory


def single_layer_backward_propagation(dA_curr, W_curr, A_prev, Z_curr,activation='relu'):
    m = A_prev.shape[1]
    
    if activation == 'relu':
        activation_func = ReLU_backward
    else activation == 'sigmoid':
        activation_func = sigmoid_backward
    else:
        return Exception("Non-supported activation func")
    
    dZ_curr = activation_func(dA_curr,Z_curr)
    dW_curr = np.dot(dZ_curr, A_prev.T)/m
    db_curr = np.sum(dZ_curr,axis=1,keepdims=1)/m
    dA_prev = np.dot(W_curr.T, dZ_curr)
    
    return dW_curr, db_curr, dA_prev


def fully_backward_propagation(y, y_hat, memory,params_values, nn_architecture):
    grads_values = {}
    y = np.array(y)
    y_hat = np.array(y_hat)
    if 
    dA_prev = -y/(y_hat+1e-15)+(1-y)/(1-y_hat+1e-15)
    
    for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):
        layer_idx_curr = layer_idx_prev+1
        dA_curr = dA_prev
        activation = layer['activation']
        W_curr = params_values[f'W_{layer_idx_curr}']
        A_prev = memory[f'A_{layer_idx_prev}']
        Z_curr = memory[f'Z_{layer_idx_curr}']
        dW_curr, db_curr, dA_prev = single_layer_backward_propagation(dA_curr, W_curr, A_prev, Z_curr, activation)
        grads_values[f'dW_{layer_idx_curr}'] = dW_curr
        grads_values[f'db_{layer_idx_curr}'] = db_curr
    return grads_values

def updates(params_values,grads_values,learning_rate,nn_architecture):
    for layer_idx_prev, layer in enumerate(nn_architecture):
        layer_idx_curr = layer_idx_prev+1
        params_values[f'W_{layer_idx_curr}'] -= learning_rate * grads_values[f'dW_{layer_idx_curr}']
        params_values[f'b_{layer_idx_curr}'] -= learning_rate * grads_values[f'db_{layer_idx_curr}']
    return params_values


def fit(X, Y, epochs, learning_rate,nn_architecture):
    params_values = nn_architecture(nn_architecture,seed = 2019)
    cost_history = []
    accuracy_histiory = []
    
    for _ in range(epochs):
        y_hat, cache = fully_forward_propagation(X, params_values, nn_architecture)
        cost = CrossEntropyLoss(y_hat, Y)
        cost_history.append(cost)
        accuracy = Accuracy(y_hat, y)
        accuracy_histiory.append(accuracy)
        
        grads_values = fully_backward_propagation(Y, y_hat, cache, nn_architecture)
        params_values = updates(params_values,grads_values,learning_rate,nn_architecture)
    return params_values



In [14]:
40-2.33*1.01

37.6467

In [15]:
40+2.33*1.01

42.3533

In [16]:
0.107/np.sqrt(20)

0.02392592735924775

In [17]:
0.13-1.96*0.024

0.08296

In [18]:
0.13+1.96*0.024

0.17704

In [21]:
0.64/np.sqrt(20)

0.14310835055998652

In [22]:
0.73/np.sqrt(20)

0.16323296235748463

In [23]:
(8.94-7.5)/0.14

10.285714285714281

In [24]:
(8.35-8.2)/0.16

0.9375000000000022

In [25]:
(10.06-7.5)/0.14

18.28571428571429