In [17]:
#The purpose of this notebook is to serve as a basic neural network in 
#numpy. The data comes from grokking deep learning

In [18]:
import numpy as np

In [19]:
np.random.seed(1) #Necessary for reproducibility

In [20]:
def relu(x):
    return (x > 0) * x #Relu activation function returns 0 or x

def relu_deriv(x):
    return x > 0 #Returns the derivative of relu, which is 0 or 1

In [21]:
#This is the dataset for predictions on walking or stopping
streetlights = np.array([   [ 1, 0, 1],
                            [ 0, 1, 1 ],
                            [ 0, 0, 1 ],
                            [ 1, 1, 1 ]]
                       )
#This is the outcome to predict ie walking or stopping
walk_vs_stop = np.array([1,1,0,0]).T #T transposes the array
print(walk_vs_stop)

[1 1 0 0]


In [22]:
#Declare the learning rates and the size of the hidden layers
alpha = 0.2
hidden_size = 4

In [23]:
#initialize weight arrays for each of the two hidden layers
#Random.random outputs an array of floats of the input array multiplied
weights_0_1 = 2*np.random.random((3,hidden_size)) - 1
weights_1_2 = 2*np.random.random((hidden_size,1)) - 1

print(weights_0_1)
print()
print(weights_1_2)

[[-0.16595599  0.44064899 -0.99977125 -0.39533485]
 [-0.70648822 -0.81532281 -0.62747958 -0.30887855]
 [-0.20646505  0.07763347 -0.16161097  0.370439  ]]

[[-0.5910955 ]
 [ 0.75623487]
 [-0.94522481]
 [ 0.34093502]]


In [24]:
#Train the network for 60 iterations
for iteration in range(60):
    layer_2_error = 0 #Start with no error in the second layer
    for i in range(len(streetlights)): #for each input in streetlights
        layer_0 = streetlights[i:i+1] #Layer 0 becomes a slice of streetlights
        layer_1 = relu(np.dot(layer_0,weights_0_1)) #layer 1 becomes the activated product of the inputs * weights
        layer_2 = np.dot(layer_1,weights_1_2) #layer 2 becomes the dot product of layer 1 and its weights
        layer_2_error += np.sum((layer_2 - walk_vs_stop[i:i+1]) ** 2) #Add the squared error between the actual output and the known training data
        layer_2_delta = (layer_2 - walk_vs_stop[i:i+1]) #Find the difference between layer 2's output and the expected output
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu_deriv(layer_1) #Find the derivative of the relut function and multiply that by the dot product of layer 2's delta and the weights for layer 2
        weights_1_2 -= alpha * layer_1.T.dot(layer_2_delta) #change the weights by the learning rate multiplied by the dot product of layer 1's output and layer 2's delta
        weights_0_1 -= alpha * layer_0.T.dot(layer_1_delta) #Change the weights of the first layer by the input and layer 1's delta
        
    if(iteration % 10 == 9):
        print("Error: " + str(layer_2_error)) #track error progress

Error: 0.6342311598444467
Error: 0.35838407676317513
Error: 0.0830183113303298
Error: 0.006467054957103705
Error: 0.0003292669000750734
Error: 1.5055622665134859e-05
