https://iamtrask.github.io/2015/07/27/python-network-part2/

In [1]:
import numpy as np

In [2]:
x = np.array([[0, 0, 1], [0, 1, 1], [1, 0, 1], [1, 1, 1]])

In [3]:
y = np.array([[0, 1, 1, 0]]).T

In [4]:
alpha, hidden_dim = (0.5, 50)

In [5]:
synapse_0 = 2 * np.random.random((3, hidden_dim)) - 1
synapse_1 = 2 * np.random.random((hidden_dim, 1)) - 1

In [6]:
for j in range(60000):
    layer_1 = 1/(1+np.exp(-(np.dot(x, synapse_0))))
    layer_2 = 1/(1+np.exp(-(np.dot(layer_1, synapse_1))))
    
    layer_2_delta = (layer_2 - y) * (layer_2 * (1 - layer_2))
    layer_1_delta = layer_2_delta.dot(synapse_1.T) * (layer_1 * 1-layer_1)
    
    synapse_1 -= (alpha * layer_1.T.dot(layer_2_delta))
    synapse_0 -= (alpha * x.T.dot(layer_1_delta))
print(layer_2)

[[ 0.07465855]
 [ 0.92468413]
 [ 0.92304228]
 [ 0.07770088]]


Changing the hidden dimension increases accuracy, to a point

### Layer Neural Networks

In [7]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [8]:
def sigmoid_output_to_derivative(output):
    return output*(1-output)

In [9]:
x = np.array([[0, 1],
              [0, 1],
              [1, 0],
              [1, 0]])

In [10]:
y = np.array([[0, 0, 1, 1]]).T

In [11]:
np.random.seed(1)

In [12]:
synapse_0 = 2*np.random.random((2, 1)) - 1

In [13]:
for iteration in range(10000):
    layer_0 = x
    layer_1 = sigmoid(np.dot(layer_0, synapse_0))
    
    layer_1_error = layer_1 - y
    
    layer_1_delta = layer_1_error * sigmoid_output_to_derivative(layer_1)
    synapse_0_derivative = np.dot(layer_0.T, layer_1_delta)
    
    synapse_0 -= synapse_0_derivative

print("Output After Training:\n", layer_1)

Output After Training:
 [[ 0.00505119]
 [ 0.00505119]
 [ 0.99494905]
 [ 0.99494905]]


### Adding in Alpha Parameter

In [14]:
alphas = [0.001, 0.01, 0.1, 1, 10, 100, 1000]

In [15]:
x = np.array([[0, 0, 1],
              [0, 1, 1],
              [1, 0, 1],
              [1, 1, 1]])

In [16]:
y = np.array([[0, 1, 1, 0]]).T

In [17]:
for alpha in alphas:
    print("\nTraining with Alpha:" + str(alpha))
    np.random.seed(1)
    
    synapse_0 = 2 * np.random.random((3, 4)) - 1
    synapse_1 = 2 * np.random.random((4, 1)) - 1
    
    for iteration in range(60000):
        layer_0 = x
        layer_1 = sigmoid(np.dot(layer_0, synapse_0))
        layer_2 = sigmoid(np.dot(layer_1, synapse_1))
        
        layer_2_error = layer_2 - y
        
        if iteration % 10000 == 0:
            print("Error after " +str(iteration) + " Iterations: " + str(np.mean(abs(layer_2_error))))
        
        layer_2_delta = layer_2_error * sigmoid_output_to_derivative(layer_2)
        layer_1_error = layer_2_delta.dot(synapse_1.T)
        layer_1_delta = layer_1_error * sigmoid_output_to_derivative(layer_1)
        
        synapse_1 -= alpha * (layer_1.T.dot(layer_2_delta))
        synapse_0 -= alpha * (layer_0.T.dot(layer_1_delta))


Training with Alpha:0.001
Error after 0 Iterations: 0.496410031903
Error after 10000 Iterations: 0.495164025493
Error after 20000 Iterations: 0.493596043188
Error after 30000 Iterations: 0.491606358559
Error after 40000 Iterations: 0.489100166544
Error after 50000 Iterations: 0.485977857846

Training with Alpha:0.01
Error after 0 Iterations: 0.496410031903
Error after 10000 Iterations: 0.457431074442
Error after 20000 Iterations: 0.359097202563
Error after 30000 Iterations: 0.239358137159
Error after 40000 Iterations: 0.143070659013
Error after 50000 Iterations: 0.0985964298089

Training with Alpha:0.1
Error after 0 Iterations: 0.496410031903
Error after 10000 Iterations: 0.0428880170001
Error after 20000 Iterations: 0.0240989942285
Error after 30000 Iterations: 0.0181106521468
Error after 40000 Iterations: 0.0149876162722
Error after 50000 Iterations: 0.0130144905381

Training with Alpha:1
Error after 0 Iterations: 0.496410031903
Error after 10000 Iterations: 0.00858452565325
Error a

### Taking a closer look

In [18]:
x = np.array([[0, 0, 1],
              [0, 1, 1],
              [1, 0, 1],
              [1, 1, 1]])

In [19]:
y = np.array([[0, 1, 1, 0]]).T

In [20]:
for alpha in alphas:
    print("\nTraining With Alpha: " +str(alpha))
    np.random.seed(1)
    
    synapse_0 = 2 * np.random.random((3, 4)) - 1
    synapse_1 = 2 * np.random.random((4, 1)) - 1
    
    prev_synapse_0_weight_update = np.zeros_like(synapse_0)
    prev_synapse_1_weight_update = np.zeros_like(synapse_1)
    
    synapse_0_direction_count = np.zeros_like(synapse_0)
    synapse_1_direction_count = np.zeros_like(synapse_1)
    
    for iteration in range(60000):
        layer_0 = x
        layer_1 = sigmoid(np.dot(layer_0, synapse_0))
        layer_2 = sigmoid(np.dot(layer_1, synapse_1))
        
        layer_2_error = y - layer_2
        
        if iteration % 10000 == 0:
            print("Error: ", str(np.mean(abs(layer_2_error))))
        
        layer_2_delta = layer_2_error * sigmoid_output_to_derivative(layer_2)
        layer_1_error = layer_2_delta.dot(synapse_1.T)
        layer_1_delta = layer_1_error * sigmoid_output_to_derivative(layer_1)
        
        synapse_1_weight_update = (layer_1.T.dot(layer_2_delta))
        synapse_0_weight_update = (layer_0.T.dot(layer_1_delta))
        
        if iteration > 0:
            synapse_0_direction_count += abs(((synapse_0_weight_update > 0) + 0) - ((prev_synapse_0_weight_update > 0) + 0))
            synapse_1_direction_count += abs(((synapse_1_weight_update > 0) + 0) - ((prev_synapse_1_weight_update > 0) + 0))
            
            synapse_1 += alpha * synapse_1_weight_update
            synapse_0 += alpha * synapse_0_weight_update
            
            prev_synapse_0_weight_update = synapse_0_weight_update
            prev_synapse_1_weight_update = synapse_1_weight_update
            
    print("\nSynapse 0\n", synapse_0)
    print("\nSynapse 0 Update Direction Changes\n", synapse_0_direction_count)
    print("\nSynapse 1\n", synapse_1)
    print("\nSynapse 1 Update Direction Changes\n", synapse_1_direction_count)


Training With Alpha: 0.001
Error:  0.496410031903
Error:  0.495164164289
Error:  0.49359621952
Error:  0.491606581738
Error:  0.489100446253
Error:  0.485978204375

Synapse 0
 [[-0.28448238  0.32471257 -1.53494865 -0.47594636]
 [-0.75506073 -1.04592547 -1.45444408 -0.32606706]
 [-0.25948114 -0.13486732 -0.29722799  0.40027967]]

Synapse 0 Update Direction Changes
 [[ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 2.  0.  1.  1.]]

Synapse 1
 [[-0.61957565]
 [ 0.76414455]
 [-1.49795548]
 [ 0.40734334]]

Synapse 1 Update Direction Changes
 [[ 1.]
 [ 1.]
 [ 0.]
 [ 1.]]

Training With Alpha: 0.01
Error:  0.496410031903
Error:  0.45743930478
Error:  0.35910650918
Error:  0.239371438432
Error:  0.143077036967
Error:  0.0985994627601

Synapse 0
 [[ 2.39223796  2.56883735 -5.38288338 -3.29230775]
 [-0.35378185 -4.65092336 -5.67004639 -1.74285893]
 [-0.15431604 -1.17146816  1.97978616  3.44630778]]

Synapse 0 Update Direction Changes
 [[ 1.  1.  0.  0.]
 [ 2.  0.  0.  2.]
 [ 5.  2.  1.  1.]]

Synapse 1

### Parameterizing the size of the hidden layer

In [21]:
hidden_size = [4, 8, 16, 32, 64, 128, 256]

In [22]:
x = np.array([[0, 0, 1],
              [0, 1, 1],
              [1, 0, 1],
              [1, 1, 1]])

In [23]:
y = np.array([[0, 1, 1, 0]]).T

In [24]:
for alpha in alphas:
    print("\nTraining with Alpha: " + str(alpha))
    for size in hidden_size:
        print("\nTraining with Size:" + str(size) + "\n")
        np.random.seed(1)
        
        synapse_0 = 2 * np.random.random((3, size)) - 1
        synapse_1 = 2 * np.random.random((size, 1)) - 1
        
        for iteration in range(60000):
            
            layer_0 = x
            layer_1 = sigmoid(np.dot(layer_0, synapse_0))
            layer_2 = sigmoid(np.dot(layer_1, synapse_1))
            
            layer_2_error = layer_2 - y
            
            if iteration % 10000 == 0:
                print("Error after " + str(iteration) + " Iterations: " + str(np.mean(abs(layer_2_error))))
                
            layer_2_delta = layer_2_error * sigmoid_output_to_derivative(layer_2)
            layer_1_error = layer_2_delta.dot(synapse_1.T)
            layer_1_delta = layer_1_error * sigmoid_output_to_derivative(layer_1)
            
            synapse_1 -= alpha * (layer_1.T.dot(layer_2_delta))
            synapse_0 -= alpha * (layer_0.T.dot(layer_1_delta))

        print("\n", layer_2)


Training with Alpha: 0.001

Training with Size:4

Error after 0 Iterations: 0.496410031903
Error after 10000 Iterations: 0.495164025493
Error after 20000 Iterations: 0.493596043188
Error after 30000 Iterations: 0.491606358559
Error after 40000 Iterations: 0.489100166544
Error after 50000 Iterations: 0.485977857846

 [[ 0.42358939]
 [ 0.50110355]
 [ 0.54487022]
 [ 0.55091533]]

Training with Size:8

Error after 0 Iterations: 0.498858912827
Error after 10000 Iterations: 0.497610011723
Error after 20000 Iterations: 0.496033982942
Error after 30000 Iterations: 0.494296904767
Error after 40000 Iterations: 0.492306090243
Error after 50000 Iterations: 0.489961701295

 [[ 0.43098022]
 [ 0.50961693]
 [ 0.51959574]
 [ 0.54684834]]

Training with Size:16

Error after 0 Iterations: 0.496819794037
Error after 10000 Iterations: 0.494485357113
Error after 20000 Iterations: 0.491570163837
Error after 30000 Iterations: 0.487989082662
Error after 40000 Iterations: 0.483551192005
Error after 50000 Itera