In [None]:
# https://towardsdatascience.com/lets-code-a-neural-network-in-plain-numpy-ae7e74410795

In [3]:
import numpy as np

In [26]:
nn_architecture = [
    {"input_dim" : 2, "output_dim" : 4, "activation" : "relu"},
    {"input_dim" : 4, "output_dim" : 6, "activation" : "relu"},
    {"input_dim" : 6, "output_dim" : 6, "activation" : "relu"},
    {"input_dim" : 6, "output_dim" : 4, "activation" : "relu"},
    {"input_dim" : 4, "output_dim" : 1, "activation" : "sigmoid"},
]

In [27]:

# Read the architecture :

def init_layers(nn_architecture, seed = 99):
    np.random.seed(seed)
    number_of_layers = len(nn_architecture)
    params_values = {}
    
    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        layer_input_size = layer["input_dim"]
        layer_output_size = layer["output_dim"]
        
        params_values['W' + str(layer_idx)] = np.random.randn(
            layer_output_size, layer_input_size) * 0.1
        params_values['b' + str(layer_idx)] = np.random.randn(
            layer_output_size, 1) * 0.1
    
    return params_values

In [28]:

# Activation Functions :

def sigmoid(Z):
    return 1/(1+cp.exp(-Z))

def relu(Z):
    return np.maximum(0,Z)

def sigmoid_backward(dA,Z):
    sig = sigmoid(Z)
    return dA * sig * (1 - sig)

def relu_backwards(dA, Z):
    dZ = np.array(dA, copy = True)
    dZ[Z <= 0] = 0
    return dZ

In [29]:

# Forward Propagation :

# Single Layer
def single_layer_forward_propagation(A_prev, W_curr, b_curr, activation = "relu"):
    Z_curr = (W_curr @ A_prev) + b_curr
    
    if activation == "relu":
        activation_func = relu
    elif activation == "sigmoid":
        activation_func = sigmoid
    else:
        raise Exception('Non-supported activation function')
    
    return activation_fund(Z_curr), Z_curr



# Full
def full_forward_propagation(X, params_values, nn_architecture):
    memory = {}
    A_curr = X
    
    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx+1
        A_prev = A_curr
        
        activation_function_curr = layer["activation"]
        W_curr = params_values["W" + str(layer_idx)]
        b_curr = params_values["b" + str(layer_idx)]
        A_curr, Z_curr = single_layer_forward_propagation(A_prev, W_curr, b_curr, activation_function_curr)
        
        memory["A" + str(idx)] = A_prev
        memory["Z" + str(layer_idx)] = Z_curr 
    return A_curr, memory

In [30]:

# Loss Function :

def get_cost_value(Y_hat, Y):
    m = Y-hat.spahe[1]
    cost = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1-Y, np.log(1-Y_hat).T))
    return np.squeeze(cost)

def get_accuracy_value(Y_hat, Y):
    Y_hat_ = convert_prob_into_class(Y_hat)
    return(Y_hat_ == Y).all(axis=0).mean()

In [31]:

# Back Propagation :

# Single Layer
def single_layer_back_propagation(dA_curr, W_curr, b_curr, Z_curr, A_prev, activation ="relu"):
    m = A_prev.shape[1]
    
    if activation == "relu":
        backward_activation_func = relu_backward
    elif activation == "sigmoid":
        backward_activation_func = sigmoid_backward
    else:
        raise Excepition('Non-supported activation function.')
        
    dZ_curr = backward_activation_func(dA_curr, Z_curr)
    dW_curr = np.dot(dZ_curr, A_prev.T) / m
    db_curr = np.sum(dZ_curr, axis=1, keepdims=True) / m
    dA_prev = np.dot(W_curr.T, dZ_curr)
    
    return dA_prev, dW_curr, db_curr



# Full
def full_backward_propagation(Y_hat, Y, memory, params_value, nn_architecture):
    grads_values = {}
    m = Y.shape[1]
    Y = Y.reshape(Y_hat.shape)
    
    dA_prev = - (np.divide(Y, Y_hat) - np.divide(1-Y, 1-Y_hat))
    
    for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):
        layer_idx_curr = layer_idx_prev + 1
        activ_function_curr = layer["activation"]
        
        dA_curr = dA_prev
        
        A_prev = memory["A" + str(layer_idx_prev)]
        Z_curr = memory["Z" + str(layer_index_curr)]
        W_curr = params_values["W" + str(layer_index_curr)]
        b_curr = params_values["b" + str(layer_index_curr)]
        
        dA_prev, dW_curr, db_curr = single_layer_backward_propagation(
            dA_curr, W_curr, b_curr, Z_curr, A_prev, activation_function_curr)
        
        grads_values["dW" + str(layer_idx_curr)] = dW_curr
        grads_values["db" + str(layer_idx_curr)] = db_curr
        
    return grads_values

In [32]:

# Updating Parameter Values :

def update(param_values, grads_values, nn_architecture, learning_rate):
    for layer_idx, layer in enumerate(nn_architecture):
        params_values["W" + str(layer_idx)] -= learning_rate * grads_values["dW" + str(layer_idx)]
        params_values["b" + str(layer_idx)] -= learning_rate * grads_values["db" + str(layer_idx)]
        
    return params_values

In [34]:

# Putting it together :

def train(X, Y, nn_architecture, epochs, learning_rate):
    params_values = init_layers(nn_architecture, 2)
    cost_history = []
    accuracy_history = []
    
    for i in range(epochs):
        Y_hat, cashe = full_forward_propagation(X, params_values, nn_architecture)
        cost = get_cost_value(Y_hat, Y)
        cost_history.append(cost)
        accuracy - get_accuracy_value(Y_hat, Y)
        accuracy_history.append(accuracy)
        
        grads_values = full_backward_propagation(Y_hat, Y, cache, params_values, nn_architecture)
        params_values = update(params_values, grads_values, nn_architecture, learning_rate)
        
    return params_values, cost_history, accuracy_history

In [38]:
# https://iamtrask.github.io/2015/07/12/basic-python-network/

In [2]:
import numpy as np

In [3]:
# sigmoid function
def nonlin(x, deriv = False):
    if(deriv == True):
        return x * (1 - x)
    return 1 / (1 + np.exp(-x))

In [4]:
# input dataset
X = np.array([ [0,0,1],
              [0,1,1],
              [1,0,1],
              [1,1,1] ])

In [5]:
# output dataset
y = np.array([[0,0,1,1]]).T

In [6]:
# seed random numbers to make calculation
# deterministic (just a good practice)
np.random.seed(1)

In [7]:
# initialize weights randomly with mean 0
syn0 = 2*np.random.random((3,1)) - 1

In [8]:
for iter in range(10000):
    
    # forward propagation
    l0 = X
    l1 = nonlin(np.dot(10, syn0))
    
    # how much did we miss?
    l1_error = y - l1
    
    # multiply how much we missed by the 
    # slope of the sigmoid at the values in l1
    l1_delta = l1_error * nonlin(l1, True)
    
    # update weights
    syn0 += np.dot(l0.T, l1_delta)

ValueError: operands could not be broadcast together with shapes (4,1) (3,1) 

In [9]:
print ("Output After Training: ")
print (l1)

Output After Training: 
[[1.59821083e-01]
 [9.87949077e-01]
 [4.55018303e-05]]


In [3]:
import numpy as np

# sigmoid function
def nonlin(x,deriv=False):
    if(deriv==True):
        return x*(1-x)
    return 1/(1+np.exp(-x))
    
# input dataset
X = np.array([  [0,0,1],
                [0,1,1],
                [1,0,1],
                [1,1,1] ])
    
# output dataset            
y = np.array([[0,0,1,1]]).T

# seed random numbers to make calculation
# deterministic (just a good practice)
np.random.seed(1)

# initialize weights randomly with mean 0
syn0 = 2*np.random.random((3,1)) - 1

for iter in range(10000):

    # forward propagation
    l0 = X
    l1 = nonlin(np.dot(l0,syn0))

    # how much did we miss?
    l1_error = y - l1

    # multiply how much we missed by the 
    # slope of the sigmoid at the values in l1
    l1_delta = l1_error * nonlin(l1,True)

    # update weights
    syn0 += np.dot(l0.T,l1_delta)

print ("Output After Training:")
print (l1)
print("syn0 :")
print(syn0)


Output After Training:
[[0.00966449]
 [0.00786506]
 [0.99358898]
 [0.99211957]]
syn0 :
[[ 9.67299303]
 [-0.2078435 ]
 [-4.62963669]]


In [35]:
# 3 layer NN

In [22]:
def nonlin(x, deriv = False):
    if(deriv == True):
        return x *(1-x)
    return 1/(1+np.exp(-x))

In [23]:
X = np.array([[0,0,1],
             [0,1,1],
             [1,0,1],
             [1,1,1]])

In [24]:
y = np.array([[0],
            [1],
            [1],
            [0]])

In [25]:
np.random.seed(1)

In [26]:
# randomly initilize weights with mean 0
syn0 = 2*np.random.random((3,4)) - 1
syn1 = 2*np.random.random((4,1)) - 1

In [48]:
for j in range(600000):
    
    #Feed forward through layers 0, 1, 2
    l0 = X
    l1 = nonlin(np.dot(l0, syn0))
    l2 = nonlin(np.dot(l1, syn1))
    
    # How much did we miss by?
    l2_error = y - l2
    
    if (j % 100000) == 0:
        print("Error:" +
             str(np.mean(np.abs(l2_error))))
    
    # In what direction is the target value?
    l2_delta = l2_error*nonlin(l2, True)
    
    # How much did each l1 value contrubute to the l2 error?
    l1_error = l2_delta.dot(syn1.T)
    
    # In what direction is the target l1?
    l1_delta = l1_error * nonlin(l1, True)
    
    syn1 += l1.T.dot(l2_delta)
    syn0 += l0.T.dot(l1_delta)

Error:0.0006540033605276702
Error:0.0006289970464479002
Error:0.0006066349182809881
Error:0.0005864826471159187
Error:0.0005681995279676114
Error:0.0005515140512744458


In [44]:
print(l2)

[[5.47016778e-04]
 [9.99349433e-01]
 [9.99365936e-01]
 [7.84367223e-04]]


In [116]:
print(nonlin(l2, True))

[[0.00259893]
 [0.00326717]
 [0.00297399]
 [0.00385263]]


In [46]:
print(syn1)

[[ -8.47655452]
 [  8.38351592]
 [-11.35357567]
 [  9.38068262]]


## 

In [47]:
print(np.exp(2))

7.38905609893065
