# Madaline Learning for XOR Neural Network

In [1]:
import numpy as np ;

- Set up inputs and outputs of XOR gate for training the multi-layer neural network 

In [2]:
inputs = [[0,0], [0,1], [1,0], [1,1]] ;
print('Inputs: ', inputs) ;

Inputs:  [[0, 0], [0, 1], [1, 0], [1, 1]]


In [3]:
outputs = np.zeros(len(inputs)) ;
for i in range(len(inputs)):
    if inputs[i][0] == inputs[i][1]:
        outputs[i] = 0 ;
    else:
        outputs[i] = 1 ;
        
print('Target outputs: ', outputs) ;

Target outputs:  [0. 1. 1. 0.]


- Initialize weights to random values and select a learning rate

In [4]:
# A hidden layer must be introduced for classifying the targets of XOR gate.
# Assume that hidden layer has two neurons that feed forwards an output layer of 1 neuron
rate = 0.1 ;
tolerance = 0.0001 ;
np.random.seed(100) ;
hiddenLayerWeights = np.random.randn(2,2) ;
outputLayerWeights = np.random.randn(2) ;
print('Initial weights') ;
print('-----------------------------------------')
print('Hidden Layer: \n', hiddenLayerWeights) ;
print('Output Layer: ', outputLayerWeights) ;

Initial weights
-----------------------------------------
Hidden Layer: 
 [[-1.74976547  0.3426804 ]
 [ 1.1530358  -0.25243604]]
Output Layer:  [0.98132079 0.51421884]


- Define the activation function - sigmoid function

In [5]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x)) ;

def derv_sigmoid(x):
    return x*(1 - x) ; # 'x' should already be sigmoided first !

- Train the weights accordingly and propagate the error backwards to adjust the weights 

-- Based on epochs

In [None]:
epochs = 100 ;
du = np.zeros(2) ;
for e in range(epochs):
    print('Epoch', (e+1)) ;
    print('----------------------------------------------------------') ;
    for i in range(len(inputs)):
        # Feed forward
        u1 = sigmoid(np.dot(inputs[i], hiddenLayerWeights[0][:])) ;
        u2 = sigmoid(np.dot(inputs[i], hiddenLayerWeights[1][:])) ;
        u13 = np.dot(u1, outputLayerWeights[0]) ;
        u23 = np.dot(u2, outputLayerWeights[1]) ;
        u3 = sigmoid(u13 + u23) ;
        error = outputs[i] - u3 ;
        # Error backpropagation
        derror = error * derv_sigmoid(u3) ;
        du[0] = derror * outputLayerWeights[0] * derv_sigmoid(u1) ; 
        du[1] = derror * outputLayerWeights[1] * derv_sigmoid(u2) ; 
        outputLayerWeights[0] += rate*u1*derror ;
        outputLayerWeights[1] += rate*u2*derror ;
        for j in range(hiddenLayerWeights.shape[0]):
            for k in range(hiddenLayerWeights.shape[1]):
                hiddenLayerWeights[j][k] += inputs[i][k] * rate * du[j] ; 
        print('Hidden Layer: \n', hiddenLayerWeights) ;
        print('Output Layer: ', outputLayerWeights) ;
    print('------------------------------------------------------') ;


 -- Based on mean square error (MSE)

In [6]:
du = np.zeros(2) ;
epoch = 0 ;
err = np.zeros(4) ;
prev_mse = 0 ;
switch = True ;
while switch:
    print('Epoch', (epoch+1)) ;
    print('----------------------------------------------------------') ;
    for i in range(len(inputs)):
        # Feed forward
        u1 = sigmoid(np.dot(inputs[i], hiddenLayerWeights[0][:])) ;
        u2 = sigmoid(np.dot(inputs[i], hiddenLayerWeights[1][:])) ;
        u13 = np.dot(u1, outputLayerWeights[0]) ;
        u23 = np.dot(u2, outputLayerWeights[1]) ;
        u3 = sigmoid(u13 + u23) ;
        error = outputs[i] - u3 ;
        err[i] = np.square(error) ;
        # Error backpropagation
        derror = error * derv_sigmoid(u3) ;
        du[0] = derror * outputLayerWeights[0] * derv_sigmoid(u1) ; 
        du[1] = derror * outputLayerWeights[1] * derv_sigmoid(u2) ; 
        outputLayerWeights[0] += rate*u1*derror ;
        outputLayerWeights[1] += rate*u2*derror ;
        for j in range(hiddenLayerWeights.shape[0]):
            for k in range(hiddenLayerWeights.shape[1]):
                hiddenLayerWeights[j][k] += inputs[i][k] * rate * du[j] ; 
        print('Hidden Layer: \n', hiddenLayerWeights) ;
        print('Output Layer: ', outputLayerWeights) ;
    mse = np.mean(err) ;
    print('MSE: ', mse) ;
    if(np.abs(mse - prev_mse) < tolerance):
        switch = False ;
    prev_mse = mse ;
    print('------------------------------------------------------') ;
    epoch += 1 ;


Epoch 1
----------------------------------------------------------
Hidden Layer: 
 [[-1.74976547  0.3426804 ]
 [ 1.1530358  -0.25243604]]
Output Layer:  [0.9739207  0.50681875]
Hidden Layer: 
 [[-1.74976547  0.34426344]
 [ 1.1530358  -0.25160119]]
Output Layer:  [0.97783588 0.50974572]
Hidden Layer: 
 [[-1.7487016   0.34426344]
 [ 1.15383755 -0.25160119]]
Output Layer:  [0.97911298 0.51630095]
Hidden Layer: 
 [[-1.75098355  0.34198148]
 [ 1.15227652 -0.25316222]]
Output Layer:  [0.97621016 0.50582426]
MSE:  0.27499264993802286
------------------------------------------------------
Epoch 2
----------------------------------------------------------
Hidden Layer: 
 [[-1.75098355  0.34198148]
 [ 1.15227652 -0.25316222]]
Output Layer:  [0.96880838 0.49842248]
Hidden Layer: 
 [[-1.75098355  0.343568  ]
 [ 1.15227652 -0.25233522]]
Output Layer:  [0.97275128 0.50136982]
Hidden Layer: 
 [[-1.74991921  0.343568  ]
 [ 1.15307055 -0.25233522]]
Output Layer:  [0.97403539 0.50796667]
Hidden Layer: 


 - Predict outputs from neural network after weight adjustment

In [7]:
threshold = 0.5 ;
predicted_outputs = np.zeros(len(inputs)) ;
for i in range(len(inputs)):
    u1 = sigmoid(np.dot(inputs[i], hiddenLayerWeights[0][:])) ;
    u2 = sigmoid(np.dot(inputs[i], hiddenLayerWeights[0][:])) ;
    predicted_outputs[i] = sigmoid(np.dot(u1, outputLayerWeights[0]) + np.dot(u2, outputLayerWeights[1])) ;
    if predicted_outputs[i] > threshold:
        predicted_outputs[i] = 1 ;
    else:
        predicted_outputs[i] = 0 ;
    
print('Predicted outputs: ', outputs) ;

Predicted outputs:  [0. 1. 1. 0.]
