In [8]:
import numpy as np

# sigmoid for binary problem
def nonlin(x):
    return 1/(1+np.exp(-x))

# derivative of sigmoid
def drv_nonlin(x):
    return x*(1-x)

# input dataset
X = np.array([ [0,1,1],
               [0,0,1],
               [1,1,1],
               [1,0,1] ])

# output dataset
# y = np.array([ [0],
#                [1],
#                [1],
#                [0] ])
# or simplify
y = np.array([[0,1,1,0]]).T

alphas = [0.1, 10, 1000]

for alpha in alphas:
    print("\nTraining With Alpha:" + str(alpha))

    # seed random numbers to make calculation
    # this is neccessary to observe the result under same random numbers in one session
    np.random.seed(1)

    # hidden layer configuration.
    # 3 layear: You need to configure synapse 0 and synapse 1 here.
    synapse_0 = 2* np.random.random((3, 4)) - 1 # 3x4
    synapse_1 = 2* np.random.random((4, 1)) - 1 # 4x1

    # start training iterations (10k times)
    for i in range(10001):
        # iteration start
        num_iter = i

        # ---- training iteration step 1: Forward Propagation
        # Forward Propagation applies weights of synapses in each layer to produce output layer
        # Output layer is the layer that is the last layer as a final output for an iteration of training
        # the initial layer is the input
        l0 = X # 4x3
        # the hidden layer after application of the first synapse weights to layer 0 values, which is the input
        l0_weighted = np.dot(l0,synapse_0) # 4x3 3x4 -> 4x4
        # smooth each weighted value into new value ranging [0, 1)
        # now we generated the hidden layer, l1
        l1 = nonlin(l0_weighted) # -> 4x4

        # we finally compute the output layer by applying the second synapse weights to layer 1 values
        l1_weighted = np.dot(l1,synapse_1) # 4x4 4x1 -> 4x1
        # smooth each weighted value into new value ranging [0, 1)
        # now we generated the hidden layer, l1
        l2 = nonlin(l1_weighted) # -> 4x1
        # ---- Forward Propagation completed


        # ---- training iteration step 2: Back Propagation
        # Back Propagation adjust each weight of synapse layer by layer

        # First we figure out the initial error amount and direction on the output layer relative to the given output y
        l2_error = y - l2# 4x1 - 4x1 -> 4x1
        # Incorporate the derivative of sigmoid on layer 2 (output layer)
        l2_delta = l2_error * drv_nonlin(l2) # 4x1 4x1 -> 4x1
        # Apply the delta computed to each synapses for the next iteration of training
        synapse_1 += alpha*l1.T.dot(l2_delta) # 4x4.T(=4x4) 4x1 -> 4x1

        # How much weights of synapse 1 contributed to the l2_delta
        l1_error = l2_delta.dot(synapse_1.T) # 4x1 4x1.T(=1x4) -> 4x4 
        # Incorporate the derivative of sigmoid on layer 1 (hidden layer)
        l1_delta = l1_error * drv_nonlin(l1) # 4x4 4x4 -> 4x4    
        # Apply the delta computed to each synapses for the next iteration of training
        synapse_0 += alpha*l0.T.dot(l1_delta) # 4x3.T(=2x4) 4x4 -> 3x4 
        # ---- Back Propagation completed

        # Checking error of the output layer respects to given output for each 10000 iterations
        # The number should decrease over the iterations
        if (num_iter% 1000) == 0:
            print("Error at " + str(num_iter) + " times: " + str(np.mean(np.abs(l2_error))))


    print('')
    print("Output After Training:")
    print(l2)


Training With Alpha:1
Error at 0 times: 0.503589968097
Error at 1000 times: 0.0491140003698
Error at 2000 times: 0.0285933488243
Error at 3000 times: 0.0217416995946
Error at 4000 times: 0.0180579295346
Error at 5000 times: 0.0156816931897
Error at 6000 times: 0.0139914002936
Error at 7000 times: 0.012713147004
Error at 8000 times: 0.0117050351321
Error at 9000 times: 0.0108852190585
Error at 10000 times: 0.0102027281294

Output After Training:
[[ 0.01187053]
 [ 0.99163483]
 [ 0.99035002]
 [ 0.01092523]]

Training With Alpha:10
Error at 0 times: 0.503589968097
Error at 1000 times: 0.252422788538
Error at 2000 times: 0.251274160635
Error at 3000 times: 0.251733180831
Error at 4000 times: 0.0132009136641
Error at 5000 times: 0.00839625941261
Error at 6000 times: 0.00671413130865
Error at 7000 times: 0.00576458226324
Error at 8000 times: 0.00513254947463
Error at 9000 times: 0.00467243365523
Error at 10000 times: 0.00431791192321

Output After Training:
[[ 0.00478068]
 [ 0.99873911]
 [ 0