In [1]:
import numpy as np 
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt 

In [5]:
def Sigmoid(Z):
    ''' the sigmoid function'''
    return 1/(1+np.exp(-Z))

def dSigmoid(Z):
    ''' the derivative of sigmoid function'''
    s = 1/(1+np.exp(-Z))
    dZ = s * (1-s)
    return dZ

### Initialize neural network 

In [2]:
nn = {
'x1': 70, 
'x2': 16 ,
'w1': 0.15,
'w2': 0.20,
'w3': 0.25,
'w4': 0.30,
'w5': 0.40,
'w6': 0.45,
'w7': 0.50,
'w8': 0.55, 
'target1': 1.0,
'target2': 0.0,
'eta' : 0.1
}

<img src="resources/network.png" width="350">

### The Forward pass

Calculate the weighted sum and output for both hidden layers and both outputs. 

$net_{h1} = w_1 * x_1 + w_2 * x_2$

$out_{h1} = $
$\frac{1}{1 + e^{-net_{h1}}}$

$net_{h2} = w_3 * x_1 + w_4 * x_2$

$out_{h2} = $
$\frac{1}{1 + e^{-net_{h2}}}$

$net_{o1} = w_5 * out_{h1} + w_6 * out_{h2}$

$out_{o1} = $
$\frac{1}{1 + e^{-net_{o1}}}$

$net_{o2} = w_7 * out_{h1} + w_8 * out_{h2}$

$out_{o2} = $
$\frac{1}{1 + e^{-net_{o2}}}$


In [3]:
def forward(nn):
    nn['net_h1'] = nn['w1'] * nn['x1'] + nn['w2'] * nn['x2']
    nn['net_h2'] = nn['w3'] * nn['x1'] + nn['w4'] * nn['x2']
    nn['out_h1'] = Sigmoid(nn['net_h1'])
    nn['out_h2'] = Sigmoid(nn['net_h2'])
    nn['net_o1'] = nn['w5'] * nn['out_h1']  + nn['w6'] * nn['out_h2']
    nn['net_o2'] = nn['w7'] * nn['out_h1']  + nn['w8'] * nn['out_h2']
    nn['out_o1'] = Sigmoid(nn['net_o1'])
    nn['out_o2'] = Sigmoid(nn['net_o2']) 
    return nn

In [4]:
forward(nn)

NameError: name 'Sigmoid' is not defined

### Error calculation 

The squared error function

$E_{total} = \sum\frac{1}{2}(target - output)^2$

$E_{o1} = \frac{1}{2}(target_{o1} - output_{o1})^2$

$E_{o2} = \frac{1}{2}(target_{o2} - output_{o2})^2$

$E_{total} = E_{o1} + E_{o2}$

In [6]:
def calc_error(nn):
    nn['err1'] = (1/2) * (nn['target1'] - nn['out_o1'])**2
    nn['err2'] = (1/2) * (nn['target2'] - nn['out_o2'])**2
    nn['total_error'] = nn['err1'] + nn['err2']
    return nn

In [7]:
calc_error(nn)

KeyError: 'out_o1'

### The backwards pass

output weight updates: 

$\frac{\partial E_{total}}{\partial w_5} = \frac{\partial E_{total}}{\partial out_{o1}} * \frac{\partial out_{o1}}{\partial net_{o1}} * \frac{\partial net_{o1}}{\partial w_5}$

$\frac{\partial E_{total}}{\partial w_6} = \frac{\partial E_{total}}{\partial out_{o1}} * \frac{\partial out_{o1}}{\partial net_{o1}} * \frac{\partial net_{o1}}{\partial w_6}$

$\frac{\partial E_{total}}{\partial w_7} = \frac{\partial E_{total}}{\partial out_{o2}} * \frac{\partial out_{o2}}{\partial net_{o2}} * \frac{\partial net_{o2}}{\partial w_7}$

$\frac{\partial E_{total}}{\partial w_8} = \frac{\partial E_{total}}{\partial out_{o2}} * \frac{\partial out_{o2}}{\partial net_{o2}} * \frac{\partial net_{o2}}{\partial w_8}$


hidden weight updates: 

$\frac{\partial E_{total}}{\partial w_1} = \frac{\partial E_{total}}{\partial out_{h1}} * \frac{\partial out_{h1}}{\partial net_{h1}} * \frac{\partial net_{h1}}{\partial w_1}$

$\frac{\partial E_{total}}{\partial w_2} = \frac{\partial E_{total}}{\partial out_{h1}} * \frac{\partial out_{h1}}{\partial net_{h1}} * \frac{\partial net_{h1}}{\partial w_2}$

$\frac{\partial E_{total}}{\partial w_3} = \frac{\partial E_{total}}{\partial out_{h2}} * \frac{\partial out_{h2}}{\partial net_{h2}} * \frac{\partial net_{h2}}{\partial w_3}$

$\frac{\partial E_{total}}{\partial w_4} = \frac{\partial E_{total}}{\partial out_{h2}} * \frac{\partial out_{h2}}{\partial net_{h2}} * \frac{\partial net_{h2}}{\partial w_4}$

In [8]:
def backward(nn):
    ############ output weights 
    
    ### w5 
    nn['dErr_outo1'] = -(nn['target1'] - nn['out_o1'])
    nn['douto1_neto1'] = dSigmoid(nn['out_o1'])
    nn['dneto1_w5'] = nn['out_h1']
    w5 = nn['w5'] - nn['eta'] * nn['dErr_outo1'] * nn['douto1_neto1'] * nn['dneto1_w5']
    
    ### w6
    nn['dneto1_w6'] = nn['out_h2']
    w6 = nn['w6'] - nn['eta'] * nn['dErr_outo1'] * nn['douto1_neto1'] * nn['dneto1_w6']

    
    ### w7 
    nn['dErr_outo2'] = -(nn['target2'] - nn['out_o2'])
    nn['douto2_neto2'] = dSigmoid(nn['out_o2'])
    nn['dneto2_w7'] = nn['out_h1']
    w7 = nn['w7'] - nn['eta'] * nn['dErr_outo2'] * nn['douto2_neto2'] * nn['dneto2_w7']
    
    ### w8
    nn['dneto2_w8'] = nn['out_h2']
    w8 = nn['w8'] - nn['eta'] * nn['dErr_outo2'] * nn['douto2_neto2'] * nn['dneto2_w8']
    
    ############ hidden weights 
    
    ### w1 
    nn['dErr_neto1'] = nn['dErr_outo1'] * nn['douto1_neto1']
    nn['dneto1_outh1'] = nn['w5']
    nn['dErr1_outh1'] = nn['dErr_neto1'] * nn['dneto1_outh1']
    nn['dErr_neto2'] = nn['dErr_outo2'] * nn['douto2_neto2']
    nn['dneto2_outh1'] = nn['w7']
    nn['dErr2_outh1'] = nn['dErr_neto2'] * nn['dneto2_outh1']
    nn['dErr_outh1'] = nn['dErr1_outh1'] + nn['dErr2_outh1']
    nn['douth1_neth1'] = dSigmoid(nn['out_h1'])
    nn['dneth1_w1'] = nn['x1']
    w1 = nn['w1'] - nn['eta'] * nn['dErr_outh1'] * nn['douth1_neth1'] * nn['dneth1_w1']
    
    ### w2
    nn['dneth1_w2'] = nn['x2']
    w2 = nn['w2'] - nn['eta'] * nn['dErr_outh1'] * nn['douth1_neth1'] * nn['dneth1_w2']
    
    ### w3
    nn['dneto1_outh2'] = nn['w6']
    nn['dErr1_outh2'] = nn['dErr_neto1'] * nn['dneto1_outh2']
    nn['dneto2_outh2'] = nn['w8'] 
    nn['dErr2_outh2'] = nn['dErr_neto2'] * nn['dneto2_outh2']
    nn['dErr_outh2'] = nn['dErr1_outh2'] + nn['dErr2_outh2']
    nn['douth2_neth2'] = dSigmoid(nn['out_h2'])
    nn['dneth2_w3'] = nn['x1']
    w3 = nn['w3'] - nn['eta'] * nn['dErr_outh2'] * nn['douth2_neth2'] * nn['dneth2_w3']
    
    ### w4
    nn['dneth2_w4'] = nn['x2']
    w4 = nn['w4'] - nn['eta'] * nn['dErr_outh2'] * nn['douth2_neth2'] * nn['dneth2_w4']
    
    ### update all weights simultaneously
    nn['w1'] = w1
    nn['w2'] = w2
    nn['w3'] = w3
    nn['w4'] = w4
    nn['w5'] = w5
    nn['w6'] = w6
    nn['w7'] = w7
    nn['w8'] = w8

    return nn

In [9]:
backward(nn)

{'x1': 70,
 'x2': 16,
 'w1': 0.07510107754711555,
 'w2': 0.18288024629648356,
 'w3': 0.16852474274223866,
 'w4': 0.2813770840553688,
 'w5': 0.4066375399108027,
 'w6': 0.45663754735971357,
 'w7': 0.48380575740490334,
 'w8': 0.5338057392310812,
 'target1': 1.0,
 'target2': 0.0,
 'eta': 0.1,
 'net_h1': 13.7,
 'net_h2': 22.3,
 'out_h1': 0.9999988775548947,
 'out_h2': 0.9999999997933511,
 'net_o1': 0.849999550928966,
 'net_o2': 1.0499994386637905,
 'out_o1': 0.7005670482710666,
 'out_o2': 0.7407747913901797,
 'err1': 0.044830046290550866,
 'err2': 0.27437364577958206,
 'total_error': 0.31920369207013294,
 'dErr_outo1': -0.29943295172893336,
 'douto1_neto1': 0.22167057175103325,
 'dneto1_w5': 0.9999988775548947,
 'dneto1_w6': 0.9999999997933511,
 'dErr_outo2': 0.7407747913901797,
 'douto2_neto2': 0.2186124711651475,
 'dneto2_w7': 0.9999988775548947,
 'dneto2_w8': 0.9999999997933511,
 'dErr_neto1': -0.06637547361085219,
 'dneto1_outh1': 0.4,
 'dErr1_outh1': -0.02655018944434088,
 'dErr_neto2'

In [10]:
def run_network(nn, niters = 10):
    for i in range(niters):
        nn = forward(nn)
        print('output1: ' + str(nn['out_o1']) + ' target1: ' + str(nn['target1']))
        print('output2: ' + str(nn['out_o2']) + ' target2: ' + str(nn['target2']))
        nn = calc_error(nn)
        print('Error: ' + str(nn['total_error']))

        nn = backward(nn)

In [11]:
run_network(nn, niters = 200)

output1: 0.703320758742449 target1: 1.0
output2: 0.7344807324321174 target2: 0.0
Error: 0.3137402592535879
output1: 0.7019869335123705 target1: 1.0
output2: 0.7237628941543841 target2: 0.0
Error: 0.3063222573760453
output1: 0.6278201712353402 target1: 1.0
output2: 0.636236879654913 target2: 0.0
Error: 0.2716575959861559
output1: 0.6049175281710153 target1: 1.0
output2: 0.606901912957801 target2: 0.0
Error: 0.2622100457491694
output1: 0.545001641014132 target1: 1.0
output2: 0.5439838488920203 target2: 0.0
Error: 0.25147096726760465
output1: 0.5204496440443604 target1: 1.0
output2: 0.519629625575949 target2: 0.0
Error: 0.24999174583669082
output1: 0.5161290181996147 target1: 1.0
output2: 0.5153565411188842 target2: 0.0
Error: 0.24986174575124454
output1: 0.5148016411832158 target1: 1.0
output2: 0.5140027671772621 target2: 0.0
Error: 0.2498081460321918
output1: 0.5146221451251878 target1: 1.0
output2: 0.5137521690385999 target2: 0.0
Error: 0.24976647659737017
output1: 0.5150711135351693 t