lets demonstrate how Backpropgation is used in neural networks to solve  the XOR problem

## Define a neural network
- input layer with 2 inputs
- hidden layer with 4 neurons
- ouput layer with  1 output neuron
- Sigmoid function as the activation function

In [None]:
import numpy as np

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.weights_input_hidden = np.random.rand(self.input_size, self.hidden_size)
        self.weights_hidden_output = np.random.rand(self.hidden_size, self.output_size)

        self.bias_hidden = np.zeros((1, self.hidden_size))
        self.bias_output = np.zeros((1, self.output_size))

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        return x * (1 - x)
    

   # Defining Feed Forward Network
    def feedforward(self, X):
        self.hidden_layer_activation = np.dot(X, self.weights_input_hidden) + self.bias_hidden
        self.hidden_layer_output = self.sigmoid(self.hidden_layer_activation)

        self.output_layer_activation = np.dot(self.hidden_layer_output, self.weights_hidden_output) + self.bias_output
        self.output_layer_output = self.sigmoid(self.output_layer_activation)

        return self.output_layer_output


    # Defining Backward Network
    def backward(self, X, y, learning_rate=0.01):
        # Calculate the error at the output layer
        output_error = y - self.output_layer_output
        output_delta = output_error * self.sigmoid_derivative(self.output_layer_output)

        # Calculate the error at the hidden layer
        hidden_error = np.dot(output_delta, self.weights_hidden_output.T)
        hidden_delta = hidden_error * self.sigmoid_derivative(self.hidden_layer_output)

        # Update weights and biases
        self.weights_hidden_output += np.dot(self.hidden_layer_output.T, output_delta) * learning_rate
        self.bias_output += np.sum(output_delta, axis=0, keepdims=True) * learning_rate

        self.weights_input_hidden += np.dot(X.T, hidden_delta) * learning_rate
        self.bias_hidden += np.sum(hidden_delta, axis=0, keepdims=True) * learning_rate



    #Training Network
    def train(self, X, y, epochs, learning_rate):
        for epoch in range(epochs):
            self.feedforward(X)
            self.backward(X, y, learning_rate)

            if epoch % 4000 == 0:
                loss = np.mean(np.square(y - self.output_layer_output))
                print(f'Epoch {epoch}, Loss: {loss}')


# Training Data
X = np.array([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]])

y = np.array([[0],
              [1],
              [1],
              [0]])

# Initialize and train the network
nn = NeuralNetwork(input_size=2, hidden_size=4, output_size=1)
nn.train(X, y, epochs=10000, learning_rate=0.1)

# Predictions after training
output = nn.feedforward(X)
print("Predictions after training:")
print(output)


Epoch 0, Loss: 0.31814941749963055
Epoch 4000, Loss: 0.040109507378415205
Epoch 8000, Loss: 0.004269012743401914
Predictions after training:
[[0.05849406]
 [0.95075117]
 [0.95134241]
 [0.05067486]]
