In [None]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def train(X, Y, epochs, learning_rate):
    w1 = np.random.rand(2, 2)
    b1 = np.random.rand(1, 2)
    w2 = np.random.rand(2, 1)
    b2 = np.random.rand(1, 1)

    for epoch in range(epochs):
        # Forward pass
        z1 = np.dot(X, w1)
        a1 = sigmoid(z1 + b1)  # Applying bias after the sigmoid activation
        z2 = np.dot(a1, w2)
        a2 = sigmoid(z2 + b2)  # Applying bias after the sigmoid activation

        # Error calculation
        error = Y - a2

        # Backpropagation
        d_a2 = error * sigmoid_derivative(a2)
        d_w2 = np.dot(a1.T, d_a2)
        d_b2 = np.sum(d_a2, axis=0)

        d_a1 = np.dot(d_a2, w2.T) * sigmoid_derivative(a1)
        d_w1 = np.dot(X.T, d_a1)
        d_b1 = np.sum(d_a1, axis=0)

        # Updating weights and biases
        w2 += learning_rate * d_w2
        b2 += learning_rate * d_b2
        w1 += learning_rate * d_w1
        b1 += learning_rate * d_b1

    return w1, b1, w2, b2

def predict(X, w1, b1, w2, b2):
    z1 = np.dot(X, w1)
    a1 = sigmoid(z1 + b1)  # Applying bias after sigmoid
    z2 = np.dot(a1, w2)
    a2 = sigmoid(z2 + b2)  # Applying bias after sigmoid
    return np.round(a2)

# Training data
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [1], [1], [0]])

# Training the model
epochs = 10000
learning_rate = 0.1
w1, b1, w2, b2 = train(X, Y, epochs, learning_rate)

# Prediction
for x, y in zip(X, Y):
    print(f"Input: {x}, Predicted: {predict(x, w1, b1, w2, b2)[0][0]}, Actual: {y[0]}")