In [74]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd 

In [75]:
# create random dataset
X = np.vstack([(np.random.rand(10,2)*5), (np.random.rand(10,2)*10)])
Y = np.hstack([[0]*10, [1]*10])

dataset = pd.DataFrame(X, columns={"X1", "X2"})
dataset["Y"] = Y

In [76]:
dataset

Unnamed: 0,X2,X1,Y
0,3.701654,0.91903,0
1,2.519024,0.316749,0
2,2.056578,2.616058,0
3,3.726043,4.258152,0
4,4.008223,3.290527,0
5,4.233931,0.835891,0
6,1.941797,1.967162,0
7,4.589698,2.813818,0
8,1.591513,3.961753,0
9,1.13617,0.461701,0


In [77]:
W1 = np.random.rand(3,2)
B1 = np.random.rand(3)

W2 = np.random.rand(3,2)
B2 = np.random.rand(2)

In [78]:
def sigmoid(x):
    return 1/1+np.exp(-x)

def derivative_sigmoid(x):
    return sigmoid(x) * (1 - sigmoid(x))

In [79]:
Z = np.zeros((20, 2))
for i in range(20):
    Z[i, Y[i]] = 1
Z

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [0., 1.]])

In [80]:
X.shape

(20, 2)

In [81]:
def forward_propagation(X, W1, B1, W2, B2):
    # first layer
    z = X.dot(W1.T) + B1
    M = sigmoid(z)
    
    # second layer
    A = M.dot(W2) + B2
    expA = np.exp(A)
    Y = expA/expA.sum(axis=1, keepdims=True)
    return Y, M
    

In [82]:
forward_propagation(X, W1, B1, W2, B2)

(array([[0.35482495, 0.64517505],
        [0.35842227, 0.64157773],
        [0.36864911, 0.63135089],
        [0.36040905, 0.63959095],
        [0.35985626, 0.64014374],
        [0.35265635, 0.64734365],
        [0.37081775, 0.62918225],
        [0.35864516, 0.64135484],
        [0.36927968, 0.63072032],
        [0.38955691, 0.61044309],
        [0.35893179, 0.64106821],
        [0.35603343, 0.64396657],
        [0.35885802, 0.64114198],
        [0.37424558, 0.62575442],
        [0.35884193, 0.64115807],
        [0.36090281, 0.63909719],
        [0.36844816, 0.63155184],
        [0.35821757, 0.64178243],
        [0.34805621, 0.65194379],
        [0.36335043, 0.63664957]]),
 array([[1.05354399, 1.02792704, 1.2912457 ],
        [1.14965883, 1.07781088, 1.53988421],
        [1.08619601, 1.03355252, 1.09543477],
        [1.01499512, 1.00535833, 1.02155115],
        [1.01782466, 1.00723275, 1.0442002 ],
        [1.03851897, 1.02094586, 1.29069188],
        [1.11892824, 1.04949299, 1.1604867

In [85]:
def diff_W1(H, Z, Y):
    return H.T.dot(Z-Y)

def diff_B1(Z, Y):
    return (Z-Y).sum(axis=0)

def diff_W2(H, output, Z, X, W2):
    dz = (Z-output).dot(W2.T) * H * (1-H)
    return X.T.dot(dz)

def diff_B2(H, Y, Z, W2):
    # ((Z-Y).dot(Wi_2.T) * H * (1-H)).sum(axis=0)
    return ((Z-Y).dot(W2) * H * (1-H)).sum(axis=0)

In [86]:
# backward propagation
learning_rate = 0.001
for epoch in range(5000):
    print(epoch)
    output, hidden = forward_propagation(X, W1, B1, W2, B2)
    print("mango1")
    W2 = diff_W2(hidden, output, Z, X, W2) * learning_rate
    print("mango2")
    B2 = B2 + diff_B2(hidden, output, Z, W2) * learning_rate
    
    W1 = W1 + diff_W1(hidden, Z, output) * learning_rate
    B2 = B2 + diff_B1(Z, output) * learning_rate
    

0


ValueError: shapes (20,3) and (2,3) not aligned: 3 (dim 1) != 2 (dim 0)

In [32]:
output, hidden = forward_propagation(X, W1, B1, W2, B2)
diff_W1(hidden, Z, output)

array([[ 4.72440892, -4.72440892],
       [ 4.30463519, -4.30463519],
       [ 3.64916222, -3.64916222]])

In [45]:
W2 = W2 + diff_W1(hidden, Z, output) * learning_rate

In [46]:
W2

array([[0.96720521, 0.0686507 ],
       [0.24060262, 0.63477206],
       [0.00696559, 0.54786256]])

In [87]:
nn_architecture = [
    {"input_dim": 2, "output_dim": 4, "activation": "relu"},
    {"input_dim": 4, "output_dim": 6, "activation": "relu"},
    {"input_dim": 6, "output_dim": 6, "activation": "relu"},
    {"input_dim": 6, "output_dim": 4, "activation": "relu"},
    {"input_dim": 4, "output_dim": 1, "activation": "sigmoid"},
]

In [88]:
def init_layers(nn_architecture, seed = 99):
    np.random.seed(seed)
    number_of_layers = len(nn_architecture)
    params_values = {}

    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        layer_input_size = layer["input_dim"]
        layer_output_size = layer["output_dim"]
        
        params_values['W' + str(layer_idx)] = np.random.randn(
            layer_output_size, layer_input_size) * 0.1
        params_values['b' + str(layer_idx)] = np.random.randn(
            layer_output_size, 1) * 0.1
        
    return params_values

In [89]:
def sigmoid(Z):
    return 1/(1+np.exp(-Z))

def relu(Z):
    return np.maximum(0,Z)

def sigmoid_backward(dA, Z):
    sig = sigmoid(Z)
    return dA * sig * (1 - sig)

def relu_backward(dA, Z):
    dZ = np.array(dA, copy = True)
    dZ[Z <= 0] = 0;
    return dZ;

In [90]:
def single_layer_forward_propagation(A_prev, W_curr, b_curr, activation="relu"):
    Z_curr = np.dot(W_curr, A_prev) + b_curr
    
    if activation is "relu":
        activation_func = relu
    elif activation is "sigmoid":
        activation_func = sigmoid
    else:
        raise Exception('Non-supported activation function')
        
    return activation_func(Z_curr), Z_curr

In [91]:
def full_forward_propagation(X, params_values, nn_architecture):
    memory = {}
    A_curr = X
    
    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        A_prev = A_curr
        
        activ_function_curr = layer["activation"]
        W_curr = params_values["W" + str(layer_idx)]
        b_curr = params_values["b" + str(layer_idx)]
        A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, activ_function_curr)
        
        memory["A" + str(idx)] = A_prev
        memory["Z" + str(layer_idx)] = Z_curr
       
    return A_curr, memory

In [92]:
def get_cost_value(Y_hat, Y):
    m = Y_hat.shape[1]
    cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T))
    return np.squeeze(cost)

def get_accuracy_value(Y_hat, Y):
    Y_hat_ = convert_prob_into_class(Y_hat)
    return (Y_hat_ == Y).all(axis=0).mean()

In [93]:
def single_layer_backward_propagation(dA_curr, W_curr, b_curr, Z_curr, A_prev, activation="relu"):
    m = A_prev.shape[1]
    
    if activation is "relu":
        backward_activation_func = relu_backward
    elif activation is "sigmoid":
        backward_activation_func = sigmoid_backward
    else:
        raise Exception('Non-supported activation function')
    
    dZ_curr = backward_activation_func(dA_curr, Z_curr)
    dW_curr = np.dot(dZ_curr, A_prev.T) / m
    db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m
    dA_prev = np.dot(W_curr.T, dZ_curr)

    return dA_prev, dW_curr, db_curr

In [94]:
def full_backward_propagation(Y_hat, Y, memory, params_values, nn_architecture):
    grads_values = {}
    m = Y.shape[1]
    Y = Y.reshape(Y_hat.shape)
   
    dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat));
    
    for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):
        layer_idx_curr = layer_idx_prev + 1
        activ_function_curr = layer["activation"]
        
        dA_curr = dA_prev
        
        A_prev = memory["A" + str(layer_idx_prev)]
        Z_curr = memory["Z" + str(layer_idx_curr)]
        W_curr = params_values["W" + str(layer_idx_curr)]
        b_curr = params_values["b" + str(layer_idx_curr)]
        
        dA_prev, dW_curr, db_curr = single_layer_backward_propagation(
            dA_curr, W_curr, b_curr, Z_curr, A_prev, activ_function_curr)
        
        grads_values["dW" + str(layer_idx_curr)] = dW_curr
        grads_values["db" + str(layer_idx_curr)] = db_curr
    
    return grads_values

In [95]:
def update(params_values, grads_values, nn_architecture, learning_rate):
    for layer_idx, layer in enumerate(nn_architecture):
        params_values["W" + str(layer_idx)] -= learning_rate * grads_values["dW" + str(layer_idx)]        
        params_values["b" + str(layer_idx)] -= learning_rate * grads_values["db" + str(layer_idx)]

    return params_values;

In [96]:
def train(X, Y, nn_architecture, epochs, learning_rate):
    params_values = init_layers(nn_architecture, 2)
    cost_history = []
    accuracy_history = []
    
    for i in range(epochs):
        Y_hat, cashe = full_forward_propagation(X, params_values, nn_architecture)
        cost = get_cost_value(Y_hat, Y)
        cost_history.append(cost)
        accuracy = get_accuracy_value(Y_hat, Y)
        accuracy_history.append(accuracy)
        
        grads_values = full_backward_propagation(Y_hat, Y, cashe, params_values, nn_architecture)
        params_values = update(params_values, grads_values, nn_architecture, learning_rate)
        
    return params_values, cost_history, accuracy_history