 
Implement Error Back propagation algorithm for fully connected neural network to solve
two input XOR classification problem.

In [25]:
import numpy as np
import matplotlib.pyplot as plt

class Layer:
    def __init__(self):
        self.input = None
        self.output = None
        
    def forward(self, input_data):
        pass
        
    def backward(self, output_error, learning_rate):
        pass

In [26]:
class Dense(Layer):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.weights = np.random.randn(input_size, output_size) * 0.01
        self.biases = np.zeros(output_size)
        
    def forward(self, input_data):
        self.input = input_data
        self.output = np.dot(input_data, self.weights) + self.biases
        return self.output
    
    def backward(self, output_error, learning_rate):
     input_error = np.dot(output_error, self.weights.T)
     weights_error = np.dot(self.input.T, output_error)
     self.weights -= learning_rate * weights_error
     self.biases -= learning_rate * np.sum(output_error, axis=0)
     return input_error * learning_rate 

In [27]:
class Activation(Layer):
    def __init__(self, activation, activation_prime):
        super().__init__()
        self.activation = activation
        self.activation_prime = activation_prime
  
    def forward(self, input_data):
        self.input = input_data
        self.output = self.activation(input_data)
        return self.output

    def backward(self, output_error, learning_rate):
        return np.multiply(output_error, self.activation_prime(self.input))

In [28]:
class MSE(Layer):
    def forward(self, y_true, y_pred):
        self.input = (y_true, y_pred)
        return np.mean(np.power(y_true - y_pred, 2)) / 2

    def backward(self, output_gradient, learning_rate):
        y_true, y_pred = self.input
        return (y_pred - y_true) / np.size(y_true)

In [29]:
class Tanh(Activation):
    def __init__(self):
        def tanh(x):
            return np.tanh(x)
        
        def tanh_prime(x):
            return 1 - np.tanh(x)**2

        super().__init__(tanh, tanh_prime)

In [30]:
class Sigmoid(Activation):
    def __init__(self):
        def sigmoid(x):
            return 1 / (1 + np.exp(-x))
        
        def sigmoid_prime(x):
            s = sigmoid(x)
            return s * (1 - s)

        super().__init__(sigmoid, sigmoid_prime)

In [31]:
# Data
X = np.reshape([[-1, -1], [-1, 1], [1, -1], [1, 1]],(4,2,1))
Y = np.reshape([[-1], [1], [1], [-1]],(4,1,1))

# Network architecture
network = [
    Dense(2, 3),
    Tanh(),
    Dense(3, 1),
    Tanh()
]

In [32]:
epochs = 10000
learning_rate = 0.1
loss = MSE()

for e in range(epochs):
    error = 0
    for x, y in zip(X, Y):
        # Forward pass
        output = np.transpose(x)
        for layer in network:
            output = layer.forward(output)

        # Calculating error
        error += loss.forward(y, output)

        # Backward pass
        gradient = loss.backward(y,output)
        for layer in reversed(network):
            gradient = layer.backward(gradient,learning_rate)

    error /= len(X)
    print(f"{e+1}/{epochs}, error={error}")

1/10000, error=0.5523156354270663
2/10000, error=0.5523109478375545
3/10000, error=0.5523084195706731
4/10000, error=0.5523069804191116
5/10000, error=0.5523061020329881
6/10000, error=0.5523055202539413
7/10000, error=0.5523051007747596
8/10000, error=0.5523047738203306
9/10000, error=0.5523045021648956
10/10000, error=0.5523042653194186
11/10000, error=0.5523040516103253
12/10000, error=0.5523038541422101
13/10000, error=0.5523036686944236
14/10000, error=0.552303492595546
15/10000, error=0.5523033241022913
16/10000, error=0.5523031620451262
17/10000, error=0.5523030056190917
18/10000, error=0.5523028542562827
19/10000, error=0.5523027075458375
20/10000, error=0.5523025651825043
21/10000, error=0.5523024269329234
22/10000, error=0.552302292613187
23/10000, error=0.5523021620737314
24/10000, error=0.552302035189079
25/10000, error=0.5523019118508283
26/10000, error=0.5523017919628389
27/10000, error=0.5523016754379135
28/10000, error=0.5523015621955013
29/10000, error=0.55230145216010

In [33]:
#Testing
def predict(input_data):
    output = np.transpose(input_data)
    for layer in network:
        output = layer.forward(output)
    return output


for i in range(len(X)):
    x = X[i]
    expected_output = Y[i]
    output = predict(x)
    print(f"Data={x}, Expected={expected_output}, Output={output}")

Data=[[-1]
 [-1]], Expected=[[-1]], Output=[[0.0187074]]
Data=[[-1]
 [ 1]], Expected=[[1]], Output=[[0.01791694]]
Data=[[ 1]
 [-1]], Expected=[[1]], Output=[[-0.02381521]]
Data=[[1]
 [1]], Expected=[[-1]], Output=[[-0.02460547]]


##B)
Implement Error Back propagation algorithm for fully connected neural network to solve three input XNOR classification problem.

In [35]:
# Define the XNOR input data
X = np.reshape([[-1, -1], [-1, 1], [1, -1], [1, 1]],(4,2,1))
Y = np.reshape([[1], [-1], [-1], [1]],(4,1,1))

# Network architecture
network = [
    Dense(2, 3),
    Tanh(),
    Dense(3, 1),
    Tanh()
]

In [36]:
epochs = 10000
learning_rate = 0.1
loss = MSE()

for e in range(epochs):
    error = 0
    for x, y in zip(X, Y):
        # Forward pass
        output = np.transpose(x)
        for layer in network:
            output = layer.forward(output)

        # Calculating error
        error += loss.forward(y, output)

        # Backward pass
        gradient = loss.backward(y,output)
        for layer in reversed(network):
            gradient = layer.backward(gradient,learning_rate)

    error /= len(X)
    print(f"{e+1}/{epochs}, error={error}")

1/10000, error=0.5523274783604202
2/10000, error=0.5523225393985299
3/10000, error=0.5523197576718704
4/10000, error=0.5523180688659457
5/10000, error=0.5523169467602688
6/10000, error=0.5523161277268595
7/10000, error=0.5523154773609931
8/10000, error=0.5523149255831876
9/10000, error=0.5523144348283829
10/10000, error=0.5523139842955287
11/10000, error=0.552313562042473
12/10000, error=0.5523131609471165
13/10000, error=0.5523127765969322
14/10000, error=0.5523124061557338
15/10000, error=0.5523120477359401
16/10000, error=0.5523117000389932
17/10000, error=0.5523113621423021
18/10000, error=0.5523110333689113
19/10000, error=0.5523107132054856
20/10000, error=0.5523104012494592
21/10000, error=0.5523100971743187
22/10000, error=0.5523098007064609
23/10000, error=0.5523095116095856
24/10000, error=0.5523092296740834
25/10000, error=0.5523089547097679
26/10000, error=0.5523086865408727
27/10000, error=0.5523084250025914
28/10000, error=0.5523081699386709
29/10000, error=0.552307921199

In [37]:
#Testing
def predict(input_data):
    output = np.transpose(input_data)
    for layer in network:
        output = layer.forward(output)
    return output


for i in range(len(X)):
    x = X[i]
    expected_output = Y[i]
    output = predict(x)
    print(f"Data={x}, Expected={expected_output}, Output={output}")

Data=[[-1]
 [-1]], Expected=[[1]], Output=[[-0.01840468]]
Data=[[-1]
 [ 1]], Expected=[[-1]], Output=[[-0.01761788]]
Data=[[ 1]
 [-1]], Expected=[[-1]], Output=[[0.02356558]]
Data=[[1]
 [1]], Expected=[[1]], Output=[[0.02435218]]
