In [1]:
import numpy as np

In [2]:
# Sigmoid activation function and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)  # derivative of sigmoid

In [3]:

# XOR dataset
X = np.array([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]])

Y = np.array([[0], [1], [1], [0]])  # Expected output

In [4]:
# Initialize weights and biases
np.random.seed(42)
input_size = 2
hidden_size = 3
output_size = 1

W1 = np.random.randn(input_size, hidden_size)  # Weights for input to hidden layer 1
b1 = np.random.randn(hidden_size)              # Bias for hidden layer 1

W2 = np.random.randn(hidden_size, hidden_size) # Weights for hidden layer 1 to hidden layer 2
b2 = np.random.randn(hidden_size)              # Bias for hidden layer 2

W3 = np.random.randn(hidden_size, output_size) # Weights for hidden layer 2 to output
b3 = np.random.randn(output_size)              # Bias for output layer


In [5]:
# Training parameters
epochs = 10000
learning_rate = 0.1

In [6]:
# Training loop
for epoch in range(epochs):
    # Forward pass
    Z1 = np.dot(X, W1) + b1
    A1 = sigmoid(Z1)

    Z2 = np.dot(A1, W2) + b2
    A2 = sigmoid(Z2)

    Z3 = np.dot(A2, W3) + b3
    A3 = sigmoid(Z3)  # Final output

    # Compute loss (mean squared error)
    loss = np.mean((Y - A3) ** 2)

    # Backpropagation
    dA3 = 2 * (A3 - Y) / Y.size * sigmoid_derivative(A3)  # Gradient for output layer
    dW3 = np.dot(A2.T, dA3)
    db3 = np.sum(dA3, axis=0)

    dA2 = np.dot(dA3, W3.T) * sigmoid_derivative(A2)
    dW2 = np.dot(A1.T, dA2)
    db2 = np.sum(dA2, axis=0)

    dA1 = np.dot(dA2, W2.T) * sigmoid_derivative(A1)
    dW1 = np.dot(X.T, dA1)
    db1 = np.sum(dA1, axis=0)

    # Update weights and biases
    W3 -= learning_rate * dW3
    b3 -= learning_rate * db3

    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2

    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1

    # Print loss every 1000 epochs
    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

Epoch 0, Loss: 0.3378
Epoch 1000, Loss: 0.2493
Epoch 2000, Loss: 0.2490
Epoch 3000, Loss: 0.2486
Epoch 4000, Loss: 0.2479
Epoch 5000, Loss: 0.2467
Epoch 6000, Loss: 0.2442
Epoch 7000, Loss: 0.2372
Epoch 8000, Loss: 0.2129
Epoch 9000, Loss: 0.1532


In [7]:

# Test the trained model
print("\nFinal Output:")
print(A3)


Final Output:
[[0.1961353 ]
 [0.75946922]
 [0.73818119]
 [0.29240433]]
