Here I put up three examples to demonstrate both the forward and backward propagation process of simple neural network.The first one is using input with a rank 1 object or when you use zip(x, y) to iterate through the samples. The problem with rank 1 array is when you do the calculation, the dimension of the operation become subtle to track which may end up with bugs you do not know. This is the reason why Andrew NG specifically addressed that you should not use rank 1 array during modeling. 

Hence the second example is an improvement by changing any rank 1 array to rank 2 array.

The third example goes with vectorized sample training which is what really happens in real world.

In [2]:
import numpy as np

def sigmoid(x):
    # Use Sigmoid as the activation function
    return 1 / (1 + np.exp(-x))

x = np.array([0.5, 0.1, -0.2]) # here x is a rank 1 object
y = 0.6

learnrate = 0.5

weights_input_hidden = np.array([[0.5, -0.6],
                                                 [0.1, -0.2],
                                                 [0.1, 0.7]])

weights_hidden_output = np.array([0.1, -0.3]) # shape 2,

# Forward propagation
hidden_layer_input = np.dot(x, weights_input_hidden)
hidden_layer_output = sigmoid(hidden_layer_input) # shape 2, 
#print(hidden_layer_output)

output_layer_input = np.dot(hidden_layer_output, weights_hidden_output)
output = sigmoid(output_layer_input)
#print(output) # a number

# Backwards propagation

# Calculate output error
error = y - output # a number, rank 0


# Calculate error term for output layer
output_error_term = error * output * (1 - output) # a number
#print(output_error_term)

# Calculate error term for hidden layer
hidden_error_term = output_error_term *weights_hidden_output *\
                    hidden_layer_output * (1 - hidden_layer_output) # shape 2,
#print(hidden_error_term)

# Calculate change in weights for hidden layer to output layer
weights_hidden_output += learnrate * output_error_term * hidden_layer_output # shape 2, 

# Calculate change in weights for input layer to hidden layer
weights_input_hidden += learnrate * hidden_error_term * x[:, None]

#print('weights update for hidden layer to output layer:')
print(weights_hidden_output)
#print('weights update for input layer to hidden layer:')
print(weights_input_hidden)

[ 0.10804047 -0.29444082]
[[ 0.50017701 -0.60051118]
 [ 0.1000354  -0.20010224]
 [ 0.0999292   0.70020447]]


In [3]:
# single sample with no Rank 1 objects

import numpy as np

def sigmoid(x):
    # Use Sigmoid as the activation function
    return 1 / (1 + np.exp(-x))

x = np.array([[0.5, 0.1, -0.2]]) # change shape for test
y = np.array([[0.6]]) # change shape for test

learnrate = 0.5

weights_input_hidden = np.array([[0.5, -0.6],
                                                 [0.1, -0.2],
                                                 [0.1, 0.7]])

weights_hidden_output = np.array([[0.1], [-0.3]]) # change shape for test

# Forward propagation
hidden_layer_input = np.dot(x, weights_input_hidden) # shape 1, 2
hidden_layer_output = sigmoid(hidden_layer_input) # shape 1, 2
#print(hidden_layer_output)

output_layer_input = np.dot(hidden_layer_output, weights_hidden_output) 
output = sigmoid(output_layer_input) # shape 1, 1
#print(output_layer_input.shape)
# Backwards propagation

# Calculate output error
error = y - output # shape 1, 1

# Calculate error term for output layer
output_error_term = error * output * (1 - output) # shape 1, 1
#print(output_error_term)

# Calculate error term for hidden layer
hidden_error_term = np.dot(output_error_term, weights_hidden_output.T) *\
                    hidden_layer_output * (1 - hidden_layer_output)
#print(hidden_error_term) #shape 1, 2

# Calculate change in weights for hidden layer to output layer
#weights_hidden_output += learnrate * output_error_term * hidden_layer_output.T
weights_hidden_output += learnrate * np.dot(hidden_layer_output.T, output_error_term)

# Calculate change in weights for input layer to hidden layer
weights_input_hidden += learnrate * np.dot(x.T, hidden_error_term)
#weights_input_hidden += learnrate *  hidden_error_term * x.T

#print('weights update for hidden layer to output layer:')
print(weights_hidden_output)
#print('weights update for input layer to hidden layer:')
print(weights_input_hidden)

[[ 0.10804047]
 [-0.29444082]]
[[ 0.50017701 -0.60051118]
 [ 0.1000354  -0.20010224]
 [ 0.0999292   0.70020447]]


In [4]:
#Training with multiple samples, this is the most practical way in real application
# by which both inputs and outputs are vectorized as matrix
import numpy as np

alpha = 10

# use sigmoid as the activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# This is a 2 layer neural network with 1 hidden layer, with 3 input features, 4 hidden layer units and 1 output unit.
X = np.array([[0, 0, 1],
            [0, 1, 1],
            [1, 0, 1],
            [1, 1, 1]])
                
y = np.array([[0],
                    [1],
                    [1],
                    [0]])

    
n_features = X.shape[1]


#seeds the random module so the results are the same for debugging convenience
np.random.seed(32)

# randomly initialize our weights with mean 0
weights_input_hidden = np.random.normal(scale= 1 / n_features**0.5, size=(3, 4))    
weights_hidden_output = np.random.normal(scale = 1/ n_features**0.5, size=(4, 1))

print("Initial weights: ")
print(weights_input_hidden)
print(weights_hidden_output)

# epoch is 100
for j in range(100):


    # Feed forward through hidden layer and output layer
    hidden_layer_output = sigmoid(np.dot(X, weights_input_hidden)) # shape: 4, 4
    
    final_output = sigmoid(np.dot(hidden_layer_output, weights_hidden_output)) # shape: 4, 1

    # the difinition of error actually determine the positive or negative addition of the weights
    final_output_error = y - final_output # shape: 4, 1

    # final output error term equals output error times the derivatives of output layer's activation fuction
    final_output_error_term = final_output_error * final_output * (1 - final_output) # shape 4, 1
    
    # how much did each hidden layer value contribute to the output layer error (according to the weights)?
    hidden_layer_output_error = final_output_error_term.dot(weights_hidden_output.T) # shape: 4, 1 * 1, 4
    hidden_layer_output_error_term = hidden_layer_output_error * hidden_layer_output * (1 - hidden_layer_output) # shape 4, 4
    
    # weights update for each epoch
    weights_hidden_output_difference = np.dot(hidden_layer_output.T, final_output_error_term) # shape 4, 1 = 4, 4 * 4, 1
    weights_input_hidden_difference = np.dot(X.T, hidden_layer_output_error_term) # shape 3, 4 = 3, 4 * 4, 4
    
    # update the weights with averaged difference derived by all inputs
    weights_hidden_output += alpha * weights_hidden_output_difference / n_features
    weights_input_hidden += alpha * weights_input_hidden_difference / n_features
    
    if j > 0 and j % 20 == 0:
        print("Error after "+ str(j) +" iterations: " + str(np.mean(final_output_error**2)))
        print("Weights update after every 20 epoch: ")
        print(weights_input_hidden)
        print(weights_hidden_output)

Initial weights: 
[[-0.20143431  0.56794144  0.33539595  0.04057874]
 [ 0.4489087   0.33599404  0.84973866  0.960238  ]
 [-0.15079068 -0.39760774 -0.40121413  1.12030401]]
[[ 1.04235695]
 [ 0.26345293]
 [-0.33186789]
 [ 0.06592214]]
Error after 20 iterations: 0.249671646691
Weights update after every 20 epoch: 
[[-0.31186521  0.57647642  0.21399002 -0.13309149]
 [ 0.42749989  0.33908914  0.7814787   0.96151458]
 [-0.3479972  -0.40109341 -0.480883    1.11669635]]
[[ 0.87226058]
 [ 0.13100723]
 [-0.46314094]
 [-0.25683172]]
Error after 40 iterations: 0.248555608688
Weights update after every 20 epoch: 
[[-0.45821546  0.61137029  0.1644987  -0.37881868]
 [ 0.49678079  0.36928896  0.74287451  1.00909503]
 [-0.47135973 -0.36300227 -0.55002109  1.10811792]]
[[ 0.94413674]
 [ 0.22826295]
 [-0.41109329]
 [-0.38283076]]
Error after 60 iterations: 0.243826210867
Weights update after every 20 epoch: 
[[-0.7921686   0.70025013  0.14881824 -0.80393464]
 [ 0.69027893  0.45415457  0.72157744  1.20239