In [45]:
import numpy as np

class ANN:
    """ 
    Assume that the neurons have a
    sigmoid activation function,
    perform a forward pass and a
    backward pass on the network.
    Assume that the actual output of
    y is 1 and learning rate is 0.9.
    structure: 3 ip, 2 hidden, 1 op
    input layer neurons: x1, x2, x3
    x1=1, x2=0, x3=1
    w14 (weight of connection between x1 and h4) = 0.2
    w15 (weight of connection between x1 and h5) = -0.3
    w24 (weight of connection between x2 and h4) = 0.4
    w25 (weight of connection between x2 and h5) = 0.1
    w34 (weight of connection between x3 and h4) = -0.5
    w35 (weight of connection between x3 and h5) = 0.2
    hidden layer neurons: h4, h5
    theta 4 (bias of h4) = -0.4
    theta 5 (bias of h5) = 0.2
    output layer neuron: o6
    w46 (weight of connection between h4 and o6) = -0.3
    w56 (weight of connection between h5 and o6) = -0.2
    theta 6 (bias of o6) = 0.1
    
    working of the network:
    h4 = (0.2 * 1) + (0.4 * 0) + (-0.5 * 1) + (-0.4) = -0.7
    o4 = sigmoid(-0.7) = 0.3318
    h5 = (-0.3 * 1) + (0.1 * 0) + (0.2 * 1) + (0.2) = 0.1
    o5 = sigmoid(0.1) = 0.5249
    o6 = (w46 * H 4) + (w56 * H 5) + theta 6
    o6 = (-0.3 * 0.332) + (-0.2 * 0.525) + 0.1 = -0.105
    o6 = sigmoid(-0.105) = 0.4736
    
    error = ytarget - yactual = 1 - 0.4736 = 0.5264
    
    similarly we perform backward pass and update the weights and biases
    then we perform forward pass again and calculate the error, with the new weights and biases to find new ouptut
    """
    def __init__(self):
        self.weights = np.array([[0.2, -0.3], [0.4, 0.1], [-0.5, 0.2], [-0.3, -0.2]])
        self.biases = np.array([[-0.4, 0.2, 0.1]])
        self.learning_rate = 0.9
        self.y_target = 1

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def forward_pass(self, x):
        h = np.matmul(x, self.weights[:3,:]) + self.biases[0,:2]
        o = self.sigmoid(h)
        
        h6 = np.matmul(o.reshape(-1,1).T, self.weights[3:,:].T) + self.biases[0,2]  # Reshape 'o' into a 2D array here
        o6 = self.sigmoid(h6)
        
        return o6, o

    def backward(self, x, o6, o, y_target):
        """
        Perform the backward propagation
        Update the weights and biases based on the error between the predicted and actual output
        """
        # Perform a forward pass to get the output of the network
        # Calculate the error
        error = y_target - o6

        # Calculate the gradients for the output layer
        delta_o6 = error * o6 * (1 - o6)

        # Calculate the gradients for the hidden layer
        delta_o = o * (1 - o) * self.weights[3, :] * np.ones_like(o) * delta_o6

        # Reshape delta_o6 and o to match the dimensions of the weight matrices
        delta_o6 = delta_o6.reshape(1, -1)
        o = o.reshape(-1, 1)

        # Update the weights and biases
        # update weights one by one
        w14 = self.weights[0, 0] + self.learning_rate * delta_o6[0, 0] * o[0, 0]
        w15 = self.weights[0, 1] + self.learning_rate * delta_o6[0, 0] * o[1, 0]
        w24 = self.weights[1, 0] + self.learning_rate * delta_o6[0, 0] * o[0, 0]  # Use delta_o6[0, 0] here
        w25 = self.weights[1, 1] + self.learning_rate * delta_o6[0, 0] * o[1, 0]  # Use delta_o6[0, 0] here
        w34 = self.weights[2, 0] + self.learning_rate * delta_o6[0, 0] * o[0, 0]  # Use delta_o6[0, 0] here
        w35 = self.weights[2, 1] + self.learning_rate * delta_o6[0, 0] * o[1, 0]  # Use delta_o6[0, 0] here
        w46 = self.weights[3, 0] + self.learning_rate * delta_o6[0, 0] * o[0, 0]
        w56 = self.weights[3, 1] + self.learning_rate * delta_o6[0, 0] * o[1, 0]
        theta4 = self.biases[0, 0] + self.learning_rate * delta_o6[0, 0]
        theta5 = self.biases[0, 1] + self.learning_rate * delta_o6[0, 0]  # Use delta_o6[0, 0] here
        theta6 = self.biases[0, 2] + self.learning_rate * delta_o6[0, 0]

        # update weights
        self.weights = np.array([[w14, w15], [w24, w25], [w34, w35], [w46, w56]])
        self.biases = np.array([[theta4, theta5, theta6]])
        
        
    def train(self, x):
        o6, o = self.forward_pass(x)
        self.backward(x, o6,o, self.y_target)
        return self.forward_pass(x)[0]

ann = ANN() # create an instance of the ANN class
x = np.array([1, 0, 1]) # input
print("Output before training:", ann.forward_pass(x)[0]) # output before training
print("Output after training:", ann.train(x)) # output after training

    

Output before training: [[0.4738889]]
Output after training: [[0.50977364]]


In [6]:
import numpy as np


# Activation functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
def tanh(x):
    return np.tanh(x)
def relu(x):
    return np.maximum(0, x)
def leaky_relu(x):
    return np.maximum(0.01 * x, x)
def elu(x):
    return np.where(x > 0, x, 0.01 * (np.exp(x) - 1))

# Derivatives of activation functions
def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))
def tanh_derivative(x):
    return 1 - np.tanh(x) ** 2
def relu_derivative(x):
    return np.where(x > 0, 1, 0)
def leaky_relu_derivative(x):
    return np.where(x > 0, 1, 0.01)

# Loss functions
LOSSFUNC = {
    'mse': (lambda y, y_hat: np.mean((y - y_hat) ** 2), lambda y, y_hat: y_hat - y),
    'xentropy': (lambda y, y_hat: -np.mean(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat)), lambda y, y_hat: -y / y_hat + (1 - y) / (1 - y_hat))
}

# Activation functions
ACTFUNC = {
    'sigmoid': (sigmoid, sigmoid_derivative),
    'tanh': (tanh, tanh_derivative),
    'relu': (relu, relu_derivative),
    'leaky_relu': (leaky_relu, leaky_relu_derivative),
    'elu': (elu, lambda x: np.where(x > 0, 1, 0.01 * np.exp(x)))
}

def xavier_init(in_size, out_size):
    """Xavier initialization for weights
    in_size: number of neurons in the previous layer
    out_size: number of neurons in the current layer
    
    Xavier initialization is a way to initialize weights in a neural network using 
    a normal distribution whose variance is dependent on the number of neurons in the previous layer. 
    It is used to avoid the problem of vanishing and exploding gradients.
    """
    xavier_stddev = np.sqrt(2 / (in_size + out_size))
    return np.random.randn(out_size, in_size) * xavier_stddev

class pyann:
    """Artificial Neural Network class"""
    def __init__(self, layersizes, activations, lossfunc='xentropy'):
        self.layersizes = tuple(layersizes)
        self.activations = tuple(activations)
        self.lossfunc = lossfunc
        assert len(self.layersizes) - 1 == len(self.activations), \
            "NN number of layers and the activation function spec does not match"
        assert all(f in ACTFUNC for f in activations), "Unrecognized activation function used"
        assert all(isinstance(n, int) and n >= 1 for n in layersizes), \
            "Only positive integral number of perceptons is allowed in each layer"
        assert lossfunc in LOSSFUNC, "Unrecognized loss function used"
        self.params = {}
        self.grads = {}
        self.A = [None] * len(self.layersizes)
        self.Z = [None] * len(self.layersizes)
        self.dA = [None] * len(self.layersizes)
        self.dZ = [None] * len(self.layersizes)

    def init_nn(self, seed=42):
        """Initialize the weights and biases"""
        np.random.seed(seed)
        for l in range(1, len(self.layersizes)):
            self.params['W' + str(l)] = xavier_init(self.layersizes[l-1], self.layersizes[l])
            self.params['b' + str(l)] = np.zeros((self.layersizes[l], 1))

    def forward(self, X):
        """Feed forward the NN"""
        self.A[0] = X
        for l in range(1, len(self.layersizes)):
            self.Z[l] = np.dot(self.params['W' + str(l)], self.A[l-1]) + self.params['b' + str(l)]
            self.A[l] = ACTFUNC[self.activations[l-1]][0](self.Z[l])
        return self.A[-1]

    def backward(self, Y, Y_hat):
        """Back propagation"""
        self.dA[-1] = LOSSFUNC[self.lossfunc][1](Y, Y_hat)
        for l in reversed(range(1, len(self.layersizes))):
            self.dZ[l] = self.dA[l] * ACTFUNC[self.activations[l-1]][1](self.Z[l])
            self.grads['dW' + str(l)] = np.dot(self.dZ[l], self.A[l-1].T) / Y.shape[1]
            self.grads['db' + str(l)] = np.sum(self.dZ[l], axis=1, keepdims=True) / Y.shape[1]
            self.dA[l-1] = np.dot(self.params['W' + str(l)].T, self.dZ[l])

    def update(self, alpha):
        """Update weights and biases"""
        for l in range(1, len(self.layersizes)):
            self.params['W' + str(l)] -= alpha * self.grads['dW' + str(l)]
            self.params['b' + str(l)] -= alpha * self.grads['db' + str(l)]

    def fit(self, X, Y, epochs, alpha):
        """Train the NN"""
        self.init_nn()
        lossfunc = LOSSFUNC[self.lossfunc][0]
        for j in range(epochs):
            Y_hat = self.forward(X)
            self.backward(Y, Y_hat)
            self.update(alpha)
            loss = float(lossfunc(Y, Y_hat))
            print("Epoch {} - loss value {}".format(j+1, loss))
        return loss

ann = pyann([2, 3, 1], ['sigmoid', 'sigmoid'])
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [1], [1], [0]])
ann.fit(X.T, Y.T, 10000, 0.1)
Y_hat = ann.forward(X.T)
print(Y_hat.T)  # [[0.0181899 ]
                #  [0.98102338]
                #  [0.98847942]
                #  [0.01749539]]
# a good result, the network has learned the output

Epoch 1 - loss value 0.7686184661272715
Epoch 2 - loss value 0.761516478916934
Epoch 3 - loss value 0.7550472948684377
Epoch 4 - loss value 0.749160085419907
Epoch 5 - loss value 0.7438071065638567
Epoch 6 - loss value 0.7389436943319111
Epoch 7 - loss value 0.7345282200241998
Epoch 8 - loss value 0.7305220131387492
Epoch 9 - loss value 0.7268892592069338
Epoch 10 - loss value 0.7235968789142793
Epoch 11 - loss value 0.7206143940369456
Epoch 12 - loss value 0.7179137848941552
Epoch 13 - loss value 0.7154693432341793
Epoch 14 - loss value 0.7132575237544208
Epoch 15 - loss value 0.7112567968145187
Epoch 16 - loss value 0.7094475043388957
Epoch 17 - loss value 0.707811720420895
Epoch 18 - loss value 0.7063331177306353
Epoch 19 - loss value 0.7049968404870642
Epoch 20 - loss value 0.7037893844744879
Epoch 21 - loss value 0.7026984843577541
Epoch 22 - loss value 0.7017130083709978
Epoch 23 - loss value 0.7008228603155522
Epoch 24 - loss value 0.7000188886968552
Epoch 25 - loss value 0.6992