# Backpropagation

#### Synopsis
This is a small script I wrote which stimulates a small neural network ( two input nodes, three hidden layers of one node each, and one output node). This script first creates the structure and assignes random weights and biases, which are later fine tuned through backpropogation. The data fed to the model is a diabetes dataset, where the model tries to predict if a patient has diabetes (0 or 1), given their BMI and Glucose level.

In [129]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import random as rnd
import math

print("setup complete")

setup complete


In [130]:
df = pd.read_csv('diabetes.csv')
x_train, x_test, x2_train, x2_test, y_train, y_test = \
train_test_split(df['Glucose'],df['BMI'], df['Outcome'], test_size=0.2, random_state = 82)



x_train,x_test, x2_train, x2_test, y_train, y_test =\
x_train.values.tolist(), x_test.values.tolist(),x2_train.values.tolist(), x2_test.values.tolist(),\
y_train.values.tolist(), y_test.values.tolist()



In [131]:
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [132]:
#Start with random weights and biases

layers = 4 
np.random.RandomState(seed =4)
w = [np.random.rand() for i in range(layers)]
b = [np.random.rand() for i in range(layers)]
w2 = [np.random.rand() for i in range(layers)]
b2 = [np.random.rand() for i in range(layers)]


In [133]:
#define the sigmoid squish function
def sig(x):
    return 1/(1+(math.e**-x))

#create a function to compute Average Squared Sum Errors 
def ASSE(w,b,data,ans):
    SSE = 0
    for i in range(len(data)):
        SSE += (output(data[i])-ans[i])**2 
    return SSE/len(data)

#create a function to see the output of a single entry
def output (value):
    result = value
    for item in range(len(w)):
        result = sig(result*w[item]+b[item])
    return result

#create a function to see which direction and how much model should move
def des_change(output,desired_output):
    return desired_output-output

#create a function to see the cost of a single entry
def cost(output,desired_output):
    return (output-desired_output)**2

In [134]:
#check how accurate random biases and weights are, the value is the average of how off you would be for every guess
#using the current model, i.e. 0.3 means that for every guess you could expect to be 30% above or below

print(ASSE(w,b,x_test,y_test))

0.4461973072992946


In [135]:
#Define the derivatives relative to the cost function

####
#relevent derivatives:
#
#C/a = 2*(a - y)           #alpha of the output layer
#
#a/z = sig(z)*(1-sig(z))   #zeta of whichever layer you are on i.e. for hidden layer 1 it is z1
#
#z/w = a                   #alpha of the node the weight is attached to
#
#z/b = 1                   #since we are using chain rule, this has no effect on outcomes as 1*x is x
#
#z/a = w                   #weight of alpha node connected to i.e. z2/a2 = w2
####
#derivative of weight
def cd(a,y):
    Cderivative = 2*(a-y)
    return Cderivative
    
    
#derivative of bias
def ad(z):
    Bderivative =sig(z)*(1-sig(z))
    return Bderivative

In [136]:
#to calculate the derivative of any given node in the hidden layer use the derivative of bias function
#which derives the node to its zeta, the zeta to its alpha, and the final alpha to the cost function.
#all that remains is to multiply it by the remaining zeta and alpha derivatives that are inbetween

#formula for remaining derivatives is Wx*(sig(Zx)*(1-sig(Zx))) for every layer between


def forwardProp(x,x2):
    a = [x]
    a2 = [x2]
    z = []
    for i in range(len(w)):
        if i == 0:
            z.append(a[i]*w[i] +b[i] + (a2[i]*w2[i]+b2[i]))
        elif i != 0:
            z.append(a[i]*w[i] +b[i])
        a.append(sig(z[i]))
    return a,a2,z


def backProp(a,a2,z,y):
    wd= []
    bd= []
    w2d= []
    b2d= []
    for i in range(len(w)):
        tempw = ad(z[-1])*cd(a[-1],y)
        tempb = ad(z[-1])*cd(a[-1],y)
        x = i # x = 0
        while x <(len(w)-1): 
            tempw = tempw*a[x]*ad(z[x])*w[x+1]
            tempb = tempb*ad(z[x])*w[x+1]
            if x < 1:
                tempw2 = tempw*a2[x]*ad(z[x])*w2[x+1]
                tempb2 = tempb*ad(z[x])*w2[x+1]
            x += 1
        wd.append(tempw)
        bd.append(tempb)
        w2d.append(tempw2)
        b2d.append(tempb2)
    return wd, bd, w2d, b2d
    


In [137]:
a,a2,z = forwardProp(x_train[0],x2_train[0])

a
#wd, bd, w2d, b2d =backProp(a,z,y_train[0])

[88, 1.0, 0.6808626621720735, 0.7011918613056777, 0.7890851138393173]

In [138]:
totalWD = [0 for i in range(len(w))]
totalBD = [0 for i in range(len(b))]

for i in x_train:
    a,a2,z = forwardProp(x_train[i],x2_train[i])
    wd,bd,w2d,b2d = backProp(a,a2,z,y_train[x_train.index(i)])
    for e in range(len(w)):
        totalWD[e] += wd[e]
        totalBD[e] += bd[e]
print('total weight1 derivative: ',totalWD[0])
print('total weight2 derivative: ',totalWD[1])
print('total Bias1 derivative:   ',totalBD[0])
print('total Bias2 derivative:   ',totalBD[1])
print()
print('total average cost:     ', ASSE(w,b,x_train,y_train))

change = sum(des_change(output(x_train[i]),y_train[i]) for i in range(len(x_train)))/len(x_train)
print('desired average change: ',change)



total weight1 derivative:  0.0
total weight2 derivative:  0.556589269849143
total Bias1 derivative:    0.0
total Bias2 derivative:    0.8174765643243367

total average cost:      0.41455042964071603
desired average change:  -0.4291480606631066


In [139]:
def train (totalWD,totalBD,learning_rate,change):
    
    if abs(max(totalWD) > abs(min(totalWD))):
        change_weight_index = totalWD.index(max(totalWD)) 
    else:
        change_weight_index = totalWD.index(min(totalWD))
    if abs(max(totalBD)) > abs(min(totalBD)):
        change_bias_index = totalBD.index(max(totalBD))
    else:
         change_bias_index = totalBD.index(min(totalBD))
    
    
    print('weight change: ',change*learning_rate*(abs(totalWD[change_weight_index])**0.5))
    print('bias change: ', change*learning_rate*(abs(totalBD[change_bias_index])**0.5))
    w[change_weight_index] += change*learning_rate*(abs(totalWD[change_weight_index])**0.5)
    b[change_bias_index] += change*learning_rate*(abs(totalBD[change_bias_index])**0.5)

In [140]:
def epochs(learning_rate,iterations):
    x=0
    while x <iterations: 
        totalWD = [0 for i in range(len(w))]
        totalBD = [0 for i in range(len(b))]
        
        for i in x_train:
            a,a2,z = forwardProp(x_train[i],x2_train[i])
            wd,bd,w2d,b2d = backProp(a,a2,z,y_train[x_train.index(i)])
            for e in range(len(w)):
                totalWD[e] += wd[e]
                totalBD[e] += bd[e]
        for item in range(len(w)):
            print('w',item,': ',w[item])
            print('b',item,': ',b[item])
            if item == 0 :
                print('w2 0: ',w2[0])
                print('b2 0: ',b2[0])
            print('total weight',item,'derivative:  ',totalWD[item])
            print('total Bias',item,'derivative:    ',totalBD[item])
            print()
        print()
        print('total average cost:     ', ASSE(w,b,x_train,y_train))
        
        change = sum(des_change(output(x_train[i]),y_train[i]) for i in range(len(x_train)))/len(x_train)
        print('desired average change: ',change)
        print('iteration:',x)
        train(totalWD,totalBD,learning_rate,change)
        x +=1
        print('\n'*3)
        print('--------------------------------------')




In [141]:
epochs(1,100)

w 0 :  0.7109659358673398
b 0 :  0.6248865003504928
w2 0:  0.9268613402114424
b2 0:  0.40293420081441755
total weight 0 derivative:   0.0
total Bias 0 derivative:     0.0

w 1 :  0.5480285449209549
b 1 :  0.2097105338295635
total weight 1 derivative:   0.556589269849143
total Bias 1 derivative:     0.8174765643243367

w 2 :  0.2565755258520438
b 2 :  0.6782871597705876
total weight 2 derivative:   9.983491796834716
total Bias 2 derivative:     14.663003791374948

w 3 :  0.8778700177184595
b 3 :  0.7038642099372739
total weight 3 derivative:   79.71929353230342
total Bias 3 derivative:     79.71929353230342


total average cost:      0.41455042964071603
desired average change:  -0.4291480606631066
iteration: 0
weight change:  -3.8316768486730304
bias change:  -3.8316768486730304




--------------------------------------
w 0 :  0.7109659358673398
b 0 :  0.6248865003504928
w2 0:  0.9268613402114424
b2 0:  0.40293420081441755
total weight 0 derivative:   0.0
total Bias 0 derivative:     0

b 0 :  0.6248865003504928
w2 0:  0.9268613402114424
b2 0:  0.40293420081441755
total weight 0 derivative:   0.0
total Bias 0 derivative:     0.0

w 1 :  0.5480285449209549
b 1 :  0.2097105338295635
total weight 1 derivative:   0.020764548457154627
total Bias 1 derivative:     0.0304974110210591

w 2 :  0.2565755258520438
b 2 :  0.6782871597705876
total weight 2 derivative:   0.37245184270828974
total Bias 2 derivative:     0.5470293252975632

w 3 :  -0.23609514148652827
b 3 :  -0.4101009492677135
total weight 3 derivative:   -11.058450285525671
total Bias 3 derivative:     -11.058450285525671


total average cost:      0.23038152552496033
desired average change:  -3.506974805073278e-07
iteration: 14
weight change:  -1.166218108735308e-06
bias change:  -1.166218108735308e-06




--------------------------------------
w 0 :  0.7109659358673398
b 0 :  0.6248865003504928
w2 0:  0.9268613402114424
b2 0:  0.40293420081441755
total weight 0 derivative:   0.0
total Bias 0 derivative:     0.0



total weight 3 derivative:   -11.058544785919691
total Bias 3 derivative:     -11.058544785919691


total average cost:      0.23038152552406538
desired average change:  -1.9643714812577812e-14
iteration: 28
weight change:  -6.532397606092395e-14
bias change:  -6.532397606092395e-14




--------------------------------------
w 0 :  0.7109659358673398
b 0 :  0.6248865003504928
w2 0:  0.9268613402114424
b2 0:  0.40293420081441755
total weight 0 derivative:   0.0
total Bias 0 derivative:     0.0

w 1 :  0.5480285449209549
b 1 :  0.2097105338295635
total weight 1 derivative:   0.020764804601280162
total Bias 1 derivative:     0.030497787226335178

w 2 :  0.2565755258520438
b 2 :  0.6782871597705876
total weight 2 derivative:   0.372456437142496
total Bias 2 derivative:     0.5470360732578473

w 3 :  -0.2360960363051732
b 3 :  -0.41010184408635836
total weight 3 derivative:   -11.05854478592672
total Bias 3 derivative:     -11.05854478592672


total average cost:      0.23038152552406643
de

w 0 :  0.7109659358673398
b 0 :  0.6248865003504928
w2 0:  0.9268613402114424
b2 0:  0.40293420081441755
total weight 0 derivative:   0.0
total Bias 0 derivative:     0.0

w 1 :  0.5480285449209549
b 1 :  0.2097105338295635
total weight 1 derivative:   0.02076480460127553
total Bias 1 derivative:     0.030497787226328316

w 2 :  0.2565755258520438
b 2 :  0.6782871597705876
total weight 2 derivative:   0.37245643714241583
total Bias 2 derivative:     0.5470360732577262

w 3 :  -0.23609603630515796
b 3 :  -0.4101018440863431
total weight 3 derivative:   -11.058544785925129
total Bias 3 derivative:     -11.058544785925129


total average cost:      0.23038152552406693
desired average change:  3.6163616437301518e-19
iteration: 43
weight change:  1.202599018040465e-18
bias change:  1.202599018040465e-18




--------------------------------------
w 0 :  0.7109659358673398
b 0 :  0.6248865003504928
w2 0:  0.9268613402114424
b2 0:  0.40293420081441755
total weight 0 derivative:   0.0
total Bia

total weight 2 derivative:   0.37245643714241583
total Bias 2 derivative:     0.5470360732577262

w 3 :  -0.23609603630515796
b 3 :  -0.4101018440863431
total weight 3 derivative:   -11.058544785925129
total Bias 3 derivative:     -11.058544785925129


total average cost:      0.23038152552406693
desired average change:  3.6163616437301518e-19
iteration: 57
weight change:  1.202599018040465e-18
bias change:  1.202599018040465e-18




--------------------------------------
w 0 :  0.7109659358673398
b 0 :  0.6248865003504928
w2 0:  0.9268613402114424
b2 0:  0.40293420081441755
total weight 0 derivative:   0.0
total Bias 0 derivative:     0.0

w 1 :  0.5480285449209549
b 1 :  0.2097105338295635
total weight 1 derivative:   0.02076480460127553
total Bias 1 derivative:     0.030497787226328316

w 2 :  0.2565755258520438
b 2 :  0.6782871597705876
total weight 2 derivative:   0.37245643714241583
total Bias 2 derivative:     0.5470360732577262

w 3 :  -0.23609603630515796
b 3 :  -0.41010184408

w 0 :  0.7109659358673398
b 0 :  0.6248865003504928
w2 0:  0.9268613402114424
b2 0:  0.40293420081441755
total weight 0 derivative:   0.0
total Bias 0 derivative:     0.0

w 1 :  0.5480285449209549
b 1 :  0.2097105338295635
total weight 1 derivative:   0.02076480460127553
total Bias 1 derivative:     0.030497787226328316

w 2 :  0.2565755258520438
b 2 :  0.6782871597705876
total weight 2 derivative:   0.37245643714241583
total Bias 2 derivative:     0.5470360732577262

w 3 :  -0.23609603630515796
b 3 :  -0.4101018440863431
total weight 3 derivative:   -11.058544785925129
total Bias 3 derivative:     -11.058544785925129


total average cost:      0.23038152552406693
desired average change:  3.6163616437301518e-19
iteration: 69
weight change:  1.202599018040465e-18
bias change:  1.202599018040465e-18




--------------------------------------
w 0 :  0.7109659358673398
b 0 :  0.6248865003504928
w2 0:  0.9268613402114424
b2 0:  0.40293420081441755
total weight 0 derivative:   0.0
total Bia

b 0 :  0.6248865003504928
w2 0:  0.9268613402114424
b2 0:  0.40293420081441755
total weight 0 derivative:   0.0
total Bias 0 derivative:     0.0

w 1 :  0.5480285449209549
b 1 :  0.2097105338295635
total weight 1 derivative:   0.02076480460127553
total Bias 1 derivative:     0.030497787226328316

w 2 :  0.2565755258520438
b 2 :  0.6782871597705876
total weight 2 derivative:   0.37245643714241583
total Bias 2 derivative:     0.5470360732577262

w 3 :  -0.23609603630515796
b 3 :  -0.4101018440863431
total weight 3 derivative:   -11.058544785925129
total Bias 3 derivative:     -11.058544785925129


total average cost:      0.23038152552406693
desired average change:  3.6163616437301518e-19
iteration: 84
weight change:  1.202599018040465e-18
bias change:  1.202599018040465e-18




--------------------------------------
w 0 :  0.7109659358673398
b 0 :  0.6248865003504928
w2 0:  0.9268613402114424
b2 0:  0.40293420081441755
total weight 0 derivative:   0.0
total Bias 0 derivative:     0.0

w

b2 0:  0.40293420081441755
total weight 0 derivative:   0.0
total Bias 0 derivative:     0.0

w 1 :  0.5480285449209549
b 1 :  0.2097105338295635
total weight 1 derivative:   0.02076480460127553
total Bias 1 derivative:     0.030497787226328316

w 2 :  0.2565755258520438
b 2 :  0.6782871597705876
total weight 2 derivative:   0.37245643714241583
total Bias 2 derivative:     0.5470360732577262

w 3 :  -0.23609603630515796
b 3 :  -0.4101018440863431
total weight 3 derivative:   -11.058544785925129
total Bias 3 derivative:     -11.058544785925129


total average cost:      0.23038152552406693
desired average change:  3.6163616437301518e-19
iteration: 98
weight change:  1.202599018040465e-18
bias change:  1.202599018040465e-18




--------------------------------------
w 0 :  0.7109659358673398
b 0 :  0.6248865003504928
w2 0:  0.9268613402114424
b2 0:  0.40293420081441755
total weight 0 derivative:   0.0
total Bias 0 derivative:     0.0

w 1 :  0.5480285449209549
b 1 :  0.2097105338295635
t