In [None]:
import numpy as np

# Activation function: Sigmoid and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(s):
    # s is the output from the sigmoid function
    return s * (1 - s)

# XOR dataset: 2 input neurons, 1 output neuron
X = np.array([
    [0, 0],
    [0, 1],
    [1, 0],
    [1, 1]
])

y = np.array([[0],
              [1],
              [1],
              [0]])

# Network parameters
input_dim = 2       # number of input neurons
hidden_dim = 2      # number of hidden neurons (minimum 2 for XOR)
output_dim = 1      # number of output neurons
learning_rate = 0.1
epochs = 10000      # number of training iterations

# Initialize weights and biases with small random numbers
np.random.seed(42)
W1 = np.random.uniform(-1, 1, (input_dim, hidden_dim))
b1 = np.random.uniform(-1, 1, (1, hidden_dim))
W2 = np.random.uniform(-1, 1, (hidden_dim, output_dim))
b2 = np.random.uniform(-1, 1, (1, output_dim))

# Training loop
for epoch in range(epochs):
    # Forward pass
    # Hidden layer
    hidden_input = np.dot(X, W1) + b1        # Linear combination at hidden layer
    hidden_output = sigmoid(hidden_input)      # Activation at hidden layer

    # Output layer
    final_input = np.dot(hidden_output, W2) + b2  # Linear combination at output layer
    final_output = sigmoid(final_input)           # Activation at output layer

    # Compute error at output layer (difference between target and actual output)
    error = y - final_output

    # Backpropagation
    # Compute delta for output layer
    delta_output = error * sigmoid_derivative(final_output)

    # Compute error propagated back to hidden layer
    error_hidden = delta_output.dot(W2.T)
    delta_hidden = error_hidden * sigmoid_derivative(hidden_output)

    # Update weights and biases
    W2 += learning_rate * hidden_output.T.dot(delta_output)
    b2 += learning_rate * np.sum(delta_output, axis=0, keepdims=True)
    W1 += learning_rate * X.T.dot(delta_hidden)
    b1 += learning_rate * np.sum(delta_hidden, axis=0, keepdims=True)

    # Optionally print loss every 1000 epochs for monitoring
    if epoch % 1000 == 0:
        loss = np.mean(np.square(error))
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Final predictions
print("\nFinal predictions after training:")
final_hidden = sigmoid(np.dot(X, W1) + b1)
final_pred = sigmoid(np.dot(final_hidden, W2) + b2)
for i in range(len(X)):
    # Convert prediction to binary (threshold 0.5)
    predicted = 1 if final_pred[i, 0] >= 0.5 else 0
    print(f"Input: {X[i]}, Predicted: {predicted}, True: {y[i,0]}")


Epoch 0, Loss: 0.2545
Epoch 1000, Loss: 0.2500
Epoch 2000, Loss: 0.2493
Epoch 3000, Loss: 0.2422
Epoch 4000, Loss: 0.1739
Epoch 5000, Loss: 0.0480
Epoch 6000, Loss: 0.0165
Epoch 7000, Loss: 0.0089
Epoch 8000, Loss: 0.0059
Epoch 9000, Loss: 0.0043

Final predictions after training:
Input: [0 0], Predicted: 0, True: 0
Input: [0 1], Predicted: 1, True: 1
Input: [1 0], Predicted: 1, True: 1
Input: [1 1], Predicted: 0, True: 0
