In [2]:
import numpy as np

# Sigmoid Activation Function: Maps any input to a value between 0 and 1.
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Derivative of Sigmoid Activation Function: Used during backpropagation to calculate the gradient.
def sigmoid_derivative(x):
    return x * (1 - x)

# Neural Network Class Definition
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Custom weights and biases initialization
        self.weights_input_hidden = np.array([[0.2, -0.4, 0.6], [-0.1, 0.5, -0.3]])  # Input to hidden
        self.bias_hidden = np.array([[0.2, -0.2, 0.2]])  # Hidden layer bias
        self.weights_hidden_output = np.array([[0.7], [-0.5], [0.4]])  # Hidden to output
        self.bias_output = np.array([[0.1]])  # Output layer bias

    def forward(self, X):
        self.input_layer = X
        self.hidden_layer_input = np.dot(self.input_layer, self.weights_input_hidden) + self.bias_hidden
        self.hidden_layer_output = sigmoid(self.hidden_layer_input)
        self.output_layer_input = np.dot(self.hidden_layer_output, self.weights_hidden_output) + self.bias_output
        self.output_layer_output = sigmoid(self.output_layer_input)
        return self.output_layer_output

    def backward(self, X, y, learning_rate):
        error_output = y - self.output_layer_output
        output_layer_delta = error_output * sigmoid_derivative(self.output_layer_output)
        error_hidden = output_layer_delta.dot(self.weights_hidden_output.T)
        hidden_layer_delta = error_hidden * sigmoid_derivative(self.hidden_layer_output)

        self.weights_hidden_output += self.hidden_layer_output.T.dot(output_layer_delta) * learning_rate
        self.bias_output += np.sum(output_layer_delta, axis=0, keepdims=True) * learning_rate
        self.weights_input_hidden += X.T.dot(hidden_layer_delta) * learning_rate
        self.bias_hidden += np.sum(hidden_layer_delta, axis=0, keepdims=True) * learning_rate

    def train(self, X, y, epochs, learning_rate):
        for epoch in range(epochs):
            self.forward(X)
            self.backward(X, y, learning_rate)
            if epoch % 1000 == 0:
                loss = np.mean(np.square(y - self.output_layer_output))
                print(f"Epoch {epoch} - Loss: {loss}")

# Main Program
if __name__ == "__main__":
    # Modified Dataset for XOR Problem
    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    y = np.array([[0], [1], [1], [0]])  # XOR logic

    # Custom neural network with 3 hidden neurons
    nn = NeuralNetwork(input_size=2, hidden_size=3, output_size=1)

    # Train for 15,000 epochs with a learning rate of 0.01 for gradual convergence
    nn.train(X, y, epochs=15000, learning_rate=0.01)

    # Predictions after training
    print("\nPredictions after training:")
    print(nn.forward(X))


Epoch 0 - Loss: 0.2656253844377818
Epoch 1000 - Loss: 0.25052722352922147
Epoch 2000 - Loss: 0.2504432418426345
Epoch 3000 - Loss: 0.2503755345024164
Epoch 4000 - Loss: 0.2503195105633955
Epoch 5000 - Loss: 0.2502726087280806
Epoch 6000 - Loss: 0.25023291486231
Epoch 7000 - Loss: 0.25019897699868454
Epoch 8000 - Loss: 0.25016967854138145
Epoch 9000 - Loss: 0.250144149816591
Epoch 10000 - Loss: 0.2501217053593956
Epoch 11000 - Loss: 0.2501017987750972
Epoch 12000 - Loss: 0.25008398979392515
Epoch 13000 - Loss: 0.25006791991038824
Epoch 14000 - Loss: 0.25005329414835653

Predictions after training:
[[0.50140679]
 [0.48687625]
 [0.51232741]
 [0.49762452]]
