In [21]:
# import necessary libraries
import numpy as np

In [22]:
class BaseLayer:
    def __init__(self):
        self.input = None
        self.output = None

    def forward(self, input):
        pass

    def backward(self):
        pass

In [23]:
class DenseLayer(BaseLayer):
    def __init__(self, input_sizes, output_sizes):
        super().__init__()
        self.weights = np.random.randn(output_sizes, input_sizes)
        self.bias = np.random.randn(output_sizes, 1)
        
    def forward(self, input):
        self.input = input
        return np.dot(self.weights, self.input) + self.bias

    def backward(self, outputGradient, learningRate):
        inputGradient = np.dot(self.weights.transpose(), outputGradient)
        weightGradient = np.dot(outputGradient, self.input.transpose())
        self.weights = self.weights - learningRate*weightGradient
        self.bias = self.bias - learningRate*outputGradient
        return inputGradient

In [24]:
class ActivationLayer(BaseLayer):
    def __init__(self, activation, activation_prime):
        super().__init__()
        self.activation = activation
        self.activation_prime = activation_prime
    
    def forward(self, input):
        self.input = input
        return self.activation(input)
    
    def backward(self, outputGradient, learning_rate):
        return np.multiply(outputGradient, self.activation_prime(self.input))


In [32]:
class Tanh(ActivationLayer):
    def __init__(self):
        activation = lambda x : np.tanh(x)
        activation_prime = lambda x : 1 - np.tanh(x)**2
        super().__init__(activation, activation_prime)

In [26]:
def meanSquaredError(yPred, yTrue):
    return np.mean(np.power(yPred - yTrue, 2))

def meanSquaredErrorPrime(yPred , yTrue):
    return 2 * (yPred - yTrue) / np.size(yTrue)

### XOR Problem

In [36]:
X = np.reshape([[0, 0], [0, 1], [1, 0], [1, 1]], (4, 2, 1))
Y = np.reshape([[0], [1], [1], [0]], (4, 1, 1))

network = [
    DenseLayer(2, 3),
    Tanh(),
    DenseLayer(3, 1),
    Tanh()
]

# train
epochs = 10
learning_rate = 0.001
verbose = True
for e in range(epochs):
    error = 0
    for x, y in zip(X, Y):
        # forward
        output = x 
        for layer in network:
            output = layer.forward(output)

        # error
        error += meanSquaredError(output, y)

        # backward
        grad = meanSquaredErrorPrime(output, y)
        for layer in reversed(network):
            grad = layer.backward(grad, learning_rate)

    error /= len(x)
    if verbose:
        print(f"{e + 1}/{epochs}, error={error}")

1/10, error=4.354916631743853
2/10, error=4.347501706860016
3/10, error=4.339938987060156
4/10, error=4.332224601928815
5/10, error=4.324354566379513
6/10, error=4.316324777455687
7/10, error=4.308131011096666
8/10, error=4.2997689188756265
9/10, error=4.291234024717503
10/10, error=4.282521721605986
