In [2]:
import numpy as np

# Sigmoid Activation Function: Maps any input to a value between 0 and 1.
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Derivative of Sigmoid Activation Function: Used during backpropagation to calculate the gradient.
def sigmoid_derivative(x):
    return x * (1 - x)

# Neural Network Class Definition
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):  # Initialize the neural network with the given sizes for the input, hidden, and output layers.
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        # Initialize weights and biases closer to zero
        self.weights_input_hidden = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]])  # Input to hidden
        self.bias_hidden = np.array([[0.1, 0.1, 0.1]])  # Hidden layer bias

        self.weights_hidden_output = np.array([[0.2], [0.3], [0.4]])  # Hidden to output
        self.bias_output = np.array([[0.1]])  # Output layer bias

    def forward(self, X):
        # Perform the forward pass of the neural network. Compute the activations for the input, hidden, and output layers.

        self.input_layer = X  # Store the input data

        # Calculate the input to the hidden layer and apply the activation function
        self.hidden_layer_input = np.dot(self.input_layer, self.weights_input_hidden) + self.bias_hidden
        self.hidden_layer_output = sigmoid(self.hidden_layer_input)

        # Calculate the input to the output layer and apply the activation function
        self.output_layer_input = np.dot(self.hidden_layer_output, self.weights_hidden_output) + self.bias_output
        self.output_layer_output = sigmoid(self.output_layer_input)

        return self.output_layer_output

    def backward(self, X, y, learning_rate):
        # Perform the backward pass of the neural network (backpropagation). This step adjusts the weights based on the error in the output.

        # Compute the error in the output layer
        error_output = y - self.output_layer_output

        # Calculate the gradient (delta) for the output layer
        output_layer_delta = error_output * sigmoid_derivative(self.output_layer_output)

        # Compute the error in the hidden layer
        error_hidden = output_layer_delta.dot(self.weights_hidden_output.T)

        # Calculate the gradient (delta) for the hidden layer
        hidden_layer_delta = error_hidden * sigmoid_derivative(self.hidden_layer_output)

        # Update weights and biases
        # Update weights from hidden to output layer
        self.weights_hidden_output += self.hidden_layer_output.T.dot(output_layer_delta) * learning_rate

        # Update bias for the output layer
        self.bias_output += np.sum(output_layer_delta, axis=0, keepdims=True) * learning_rate

        # Update weights from input to hidden layer
        self.weights_input_hidden += X.T.dot(hidden_layer_delta) * learning_rate

        # Update bias for the hidden layer
        self.bias_hidden += np.sum(hidden_layer_delta, axis=0, keepdims=True) * learning_rate

    def train(self, X, y, epochs, learning_rate):
        # Train the neural network on the provided data using the forward and backward passes.

        for epoch in range(epochs):
            self.forward(X)

            self.backward(X, y, learning_rate)

            # Print loss (mean squared error) every 1000 epochs
            if epoch % 1000 == 0:
                loss = np.mean(np.square(y - self.output_layer_output))
                print(f"Epoch {epoch} - Loss: {loss}")

# Main Program
if __name__ == "__main__":
    # NAND Problem Dataset
    X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    y = np.array([[1], [1], [1], [0]])

    # Increased hidden layer size to 3 neurons
    nn = NeuralNetwork(input_size=2, hidden_size=3, output_size=1)

    # Train for 10,000 epochs with a smaller learning rate for better convergence
    nn.train(X, y, epochs=10000, learning_rate=0.05)

    print("\nPredictions after training:")
    print(nn.forward(X))


Epoch 0 - Loss: 0.20483235297159677
Epoch 1000 - Loss: 0.18815129873642003
Epoch 2000 - Loss: 0.17346993195161337
Epoch 3000 - Loss: 0.10385369556713733
Epoch 4000 - Loss: 0.03877419017251518
Epoch 5000 - Loss: 0.016186539509514257
Epoch 6000 - Loss: 0.008775192248239722
Epoch 7000 - Loss: 0.00564165579044863
Epoch 8000 - Loss: 0.00402437963632719
Epoch 9000 - Loss: 0.0030701713263273206

Predictions after training:
[[0.99882004]
 [0.95213494]
 [0.95335347]
 [0.07309679]]
