<a href="https://colab.research.google.com/github/EricSiq/DeepLearning/blob/main/ANN_using_XOR_gate.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
#importing library
import numpy as np


In [12]:
# Step 1: Define the Neural Network class
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.1):
        """
This step defines a class NeuralNetwork to encapsulate all the functionality of the artificial neural network.
The __init__ method is the constructor, which initializes the network's structure. It takes input_size, hidden_size, output_size, and an optional learning_rate as arguments.

        """
        if not all(isinstance(i, int) for i in [input_size, hidden_size, output_size]):
            raise TypeError("Input, hidden, and output sizes must be integers.")
        if not isinstance(learning_rate, (int, float)):
            raise TypeError("Learning rate must be a number.")
        if not (0 < learning_rate <= 1):
            raise ValueError("Learning rate should be between 0 and 1.")

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate

        # Step 2: Initialize weights and biases randomly
        """
         Initialize weights and biases randomly
          Inside the __init__ method, this step initializes the network's weights and biases.
          Weights are initialized with random values using np.random.randn, which helps the network to start learning from different points.
        """
        # Weights from input to hidden layer
        self.weights_ih = np.random.randn(self.input_size, self.hidden_size)
        # Biases for hidden layer
        self.bias_h = np.zeros((1, self.hidden_size))

        # Weights from hidden to output layer
        self.weights_ho = np.random.randn(self.hidden_size, self.output_size)
        # Biases for output layer
        self.bias_o = np.zeros((1, self.output_size))

    # Step 3: Define the activation functions (Sigmoid and its derivative)
    def sigmoid(self, x):
        """
        The sigmoid function is a common choice for activation functions in neural networks, as it squashes the input values into a range between 0 and 1, which is useful for modeling probabilities.
        The derivative of the sigmoid function is needed during backpropagation to calculate how much the weights should be adjusted.
        """
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        """Derivative of the sigmoid function."""
        return x * (1 - x)

    # Step 4: Implement the feedforward process
    def feedforward(self, inputs):
        """
        The feedforward method is responsible for moving data from the input layer, through the hidden layer, to the output layer.
        It takes an array of inputs and performs matrix multiplications and additions with the weights and biases.
        The sigmoid function is applied at each layer to introduce non-linearity.
        """
        if inputs.shape[1] != self.input_size:
            raise ValueError(f"Input shape mismatch. Expected {self.input_size} features, got {inputs.shape[1]}.")

        # Calculate hidden layer output
        self.hidden_layer_input = np.dot(inputs, self.weights_ih) + self.bias_h
        self.hidden_layer_output = self.sigmoid(self.hidden_layer_input)

        # Calculate final output
        self.output_layer_input = np.dot(self.hidden_layer_output, self.weights_ho) + self.bias_o
        self.output = self.sigmoid(self.output_layer_input)

        return self.output

    # Step 5: Implement the backpropagation process
    def backpropagation(self, inputs, targets):
        """
        The backpropagation method is the core of the learning algorithm. After a feedforward pass, it calculates the error between the network's output and the desired targets.
        It then propagates this error backward through the network to determine how much each weight and bias contributed to the error.
        This information is used to update the weights and biases using the calculated deltas and the learning_rate, aiming to reduce the overall error in the next iteration.
        """
        if targets.shape != self.output.shape:
            raise ValueError(f"Target shape mismatch. Expected {self.output.shape}, got {targets.shape}.")

        # Calculate output layer error
        output_error = targets - self.output
        output_delta = output_error * self.sigmoid_derivative(self.output)

        # Calculate hidden layer error
        hidden_error = output_delta.dot(self.weights_ho.T)
        hidden_delta = hidden_error * self.sigmoid_derivative(self.hidden_layer_output)

        # Update weights and biases for output layer
        self.weights_ho += self.hidden_layer_output.T.dot(output_delta) * self.learning_rate
        self.bias_o += np.sum(output_delta, axis=0, keepdims=True) * self.learning_rate

        # Update weights and biases for hidden layer
        self.weights_ih += inputs.T.dot(hidden_delta) * self.learning_rate
        self.bias_h += np.sum(hidden_delta, axis=0, keepdims=True) * self.learning_rate

    # Step 6: Define the training loop
    def train(self, inputs, targets, epochs):
        """
        The train method takes the input data, target data, and the number of epochs (iterations) as arguments.
        It repeatedly calls the feedforward and backpropagation methods for the specified number of epochs.
        A check is included to print the mean absolute error every 1000 epochs, allowing for monitoring of the training progress.
        """
        if not isinstance(epochs, int) or epochs <= 0:
            raise ValueError("Epochs must be a positive integer.")
        if not isinstance(inputs, np.ndarray) or not isinstance(targets, np.ndarray):
            raise TypeError("Inputs and targets must be numpy arrays.")
        if inputs.shape[0] != targets.shape[0]:
            raise ValueError("Inputs and targets must have the same number of rows.")

        for epoch in range(epochs):
            # Feedforward and backpropagation for each data point
            self.feedforward(inputs)
            self.backpropagation(inputs, targets)

            # Print error every 1000 epochs to monitor progress
            if (epoch + 1) % 1000 == 0:
                error = np.mean(np.abs(targets - self.output))
                print(f"Epoch {epoch + 1}/{epochs}, Error: {error:.4f}")

    # Step 7: Define a method for making predictions
    def predict(self, inputs):
        """
        The predict method is a simple utility function that uses the trained network to make a prediction.
        It takes new input data, performs a feedforward pass, and then rounds the final output to the nearest integer (0 or 1), which is suitable for the binary classification problem of the XOR gate.
        """
        output = self.feedforward(inputs)
        return np.round(output)

In [13]:

# Step 8: Prepare the XOR dataset
"""
It defines the training data for the XOR problem.
 X contains the four possible binary inputs [0, 0], [0, 1], [1, 0], and [1, 1].
 y contains the corresponding XOR outputs [0], [1], [1], and [0].
"""
if __name__ == '__main__':
    # XOR inputs
    X = np.array([
        [0, 0],
        [0, 1],
        [1, 0],
        [1, 1]
    ])

    # XOR outputs
    y = np.array([[0], [1], [1], [0]])

    # Step 9: Initialize and train the neural network
    # Network with 2 inputs, 4 hidden neurons, and 1 output
    """
    It then calls the train method with the prepared XOR data and a specified number of epochs (10,000 in this case) to train the network.
    A try...except block is used to catch and handle any TypeError or ValueError that might occur during setup or training.
    """
    try:
        ann = NeuralNetwork(input_size=2, hidden_size=4, output_size=1, learning_rate=0.5)
        epochs = 10000
        print(f"Training the ANN for XOR gate with {epochs} epochs...")
        ann.train(X, y, epochs)
        print("Training complete.")

        # Step 10: Test the trained network
        """
        After training is complete, this final step tests the network's ability to solve the XOR problem.
        It loops through each of the four XOR inputs, uses the predict method to get an output, and then compares the predicted output with the actual output.
        An assert statement is used to programmatically verify that the network's predictions are correct for all test cases.
        """
        print("\nTesting the trained network:")
        for i in range(len(X)):
            input_data = X[i:i+1]
            predicted_output = ann.predict(input_data)
            actual_output = y[i]
            print(f"Input: {input_data}, Predicted: {predicted_output[0][0]}, Actual: {actual_output[0]}")
            assert predicted_output[0][0] == actual_output[0], "Prediction failed for a test case."

        print("\nAll predictions are correct! The ANN successfully learned the XOR gate.")

    except (TypeError, ValueError) as e:
        print(f"An error occurred during network setup or training: {e}")

Training the ANN for XOR gate with 10000 epochs...
Epoch 1000/10000, Error: 0.0913
Epoch 2000/10000, Error: 0.0473
Epoch 3000/10000, Error: 0.0351
Epoch 4000/10000, Error: 0.0290
Epoch 5000/10000, Error: 0.0251
Epoch 6000/10000, Error: 0.0225
Epoch 7000/10000, Error: 0.0205
Epoch 8000/10000, Error: 0.0189
Epoch 9000/10000, Error: 0.0177
Epoch 10000/10000, Error: 0.0166
Training complete.

Testing the trained network:
Input: [[0 0]], Predicted: 0.0, Actual: 0
Input: [[0 1]], Predicted: 1.0, Actual: 1
Input: [[1 0]], Predicted: 1.0, Actual: 1
Input: [[1 1]], Predicted: 0.0, Actual: 0

All predictions are correct! The ANN successfully learned the XOR gate.
