In [1]:
import numpy as np

In [2]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [3]:
noOfInputs=5 #constant number of inputs
noOfLayers=2 #don't count the output layer as a layer
layerSizes=[2] #don't count the output layer
noOfOutputs=3 #constant number of outputs
initRandWeightMin=-1
initRandWeightMax=1
labels=np.zeros(10)
activationFunction= sigmoid #or np.tanh
lr = 0.001 # learning rate
useBias = True
weights, bias = [], []
neuronsOutput, errors = [], []

In [4]:
def gRandomNumbers(size):
    return np.random.uniform(low=initRandWeightMin,high=initRandWeightMax,size=size)

In [28]:
def initWeights(layerSizes):
    weights=[]
    weights.append(gRandomNumbers((noOfInputs,layerSizes[0])))
    for i, l in enumerate(layerSizes[:-1]):
        weights.append(gRandomNumbers((l))) #weights between last layer and the final output neurons
    weights.append(gRandomNumbers((layerSizes[-1],noOfOutputs))) #weights between last layer and the final output neurons
    return weights

In [5]:
def initBias():
    bias = []
    for i, l in enumerate(layerSizes):
        bias.append(gRandomNumbers((l))) #bias between last layer and the final output neurons
    bias.append(gRandomNumbers((noOfOutputs))) #bias between last layer and the final output neurons
    return bias

In [30]:
def initNeuronsOutput():
    neuronsOutput=[]
    neuronsOutput.append(np.random.random(noOfInputs))
    for l in layerSizes:
        neuronsOutput.append(np.random.random(l))
    neuronsOutput.append(np.random.random(noOfOutputs))
    return neuronsOutput

In [6]:
def initErrors():
    errors=[]
    for l in layerSizes:
        errors.append(np.random.random(l))
    errors.append(np.random.random(noOfOutputs))
    return errors

- A 'layer' is a layer of neurons
- A 'connection' is the set of weights between two layers

In [32]:
def feedForward():
    # instead of multiplying each neuron weights and inputs 
    # we can just multiply weights[i].T x inputs[j] 
    # and produce the next neuronsOutput directly 
    # for example (5, 2).T @ (5,) = (2,) -> activation((2,)) -> (2,)
    for j, connection in enumerate(weights):
        z = (connection.T @ neuronsOutput[j]) + bias[j] if useBias else 0
        neuronsOutput[j+1] = activationFunction(z)

In [33]:
def finalError(label):
    return (label-neuronsOutput[-1])*neuronsOutput[-1]*(1-neuronsOutput[-1])

In [34]:
def backPropagation(label):
    k = [i == label for i in range(noOfOutputs)]
    errors[-1]= (k-neuronsOutput[-1])*neuronsOutput[-1]*(1-neuronsOutput[-1])

    for j in range(len(weights)-1,-1,-1):
        for i in range(errors[j-1].shape[0]):
            s = (errors[j].dot(weights[j][i,:]))+ bias[j][i] if (useBias and j > 0) else 0
            errors[j-1][i]=s*(1-neuronsOutput[j-1][i])

I LOVE obfuscated code

In [35]:
def updateWeights():
    for i,connection in enumerate(weights): #Loop over every connection between every two layers
        for j in range(connection.shape[1]): # loop over every layer's neurons
            for k in range(connection.shape[0]): # loop over all the incoming weights to this neuron
                weights[i][k,j]=weights[i][k,j]+lr*neuronsOutput[i][k]*errors[i][j]
                # Rule: weight=weight+lr*error*input
                if useBias:
                    t = bias[i][j]
                    bias[i][j]=bias[i][j]+lr*errors[i][j]
                    print(f"before: {t}, after: {bias[i][j]}")

In [36]:
weights = initWeights(layerSizes)
weights

[array([[ 0.15836827,  0.01956426],
        [ 0.29136972, -0.19391232],
        [ 0.83110243, -0.80753268],
        [ 0.10412579, -0.04586494],
        [ 0.33609077, -0.25889816]]),
 array([[ 0.16258447, -0.92044583,  0.9623642 ],
        [ 0.28911247, -0.98023284,  0.17523177]])]

In [37]:
bias = initBias()
bias

[array([-0.24391834,  0.47152754]),
 array([ 0.30303463,  0.01407052, -0.90866846])]

In [38]:
neuronsOutput = initNeuronsOutput()
neuronsOutput

[array([0.08554847, 0.81772452, 0.78237309, 0.9064757 , 0.59121453]),
 array([0.86543285, 0.27840044]),
 array([0.69475773, 0.64307104, 0.56151671])]

In [39]:
errors = initErrors()
errors

[array([0.94932808, 0.26333293]), array([0.08245416, 0.11186971, 0.15893321])]

In [40]:
feedForward()

In [41]:
backPropagation(1)

In [42]:
weights

[array([[ 0.15836827,  0.01956426],
        [ 0.29136972, -0.19391232],
        [ 0.83110243, -0.80753268],
        [ 0.10412579, -0.04586494],
        [ 0.33609077, -0.25889816]]),
 array([[ 0.16258447, -0.92044583,  0.9623642 ],
        [ 0.28911247, -0.98023284,  0.17523177]])]

In [43]:
updateWeights() # it works but the error is very small so the difference is small

before: -0.24391833713073185, after: -0.2438848945216584
before: -0.2438848945216584, after: -0.24385145191258495
before: -0.24385145191258495, after: -0.2438180093035115
before: -0.2438180093035115, after: -0.24378456669443804
before: -0.24378456669443804, after: -0.2437511240853646
before: 0.47152754324857105, after: 0.4714931027378853
before: 0.4714931027378853, after: 0.47145866222719957
before: 0.47145866222719957, after: 0.4714242217165138
before: 0.4714242217165138, after: 0.4713897812058281
before: 0.4713897812058281, after: 0.47135534069514234
before: 0.30303463084062265, after: 0.30303463084062265
before: 0.30303463084062265, after: 0.30303463084062265
before: 0.014070517106217206, after: 0.014070517106217206
before: 0.014070517106217206, after: 0.014070517106217206
before: -0.908668464048201, after: -0.908668464048201
before: -0.908668464048201, after: -0.908668464048201


In [44]:
weights

[array([[ 0.15837113,  0.01956131],
        [ 0.29139706, -0.19394049],
        [ 0.83112859, -0.80755962],
        [ 0.10415611, -0.04589615],
        [ 0.33611054, -0.25891852]]),
 array([[ 0.16258447, -0.92044583,  0.9623642 ],
        [ 0.28911247, -0.98023284,  0.17523177]])]

Don't take any function for granted, not showing errors doesn't neccessarly mean working correctly

In [45]:
#TODO: loop over some samples

In [46]:
a = np.matrix([[1,2],[2,3]])
b = np.matrix([[3,4],[4,5]])
(a @ b) == (a.T  @ b)

matrix([[ True,  True],
        [ True,  True]])