In [1]:
import numpy as np


In [2]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

    

In [3]:
# Sample dataset (X = Inputs, Y = Labels)
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])  # XOR dataset
Y = np.array([[0], [1], [1], [0]])  # Expected output


In [5]:
np.random.seed(1)  # For reproducibility

input_neurons = 2   # Number of input features
hidden_neurons = 3  # Number of hidden layer neurons
output_neurons = 1  # Number of output neurons

# Weights and biases for input -> hidden
W1 = np.random.randn(input_neurons, hidden_neurons)
b1 = np.zeros((1, hidden_neurons))

# Weights and biases for hidden -> output
W2 = np.random.randn(hidden_neurons, output_neurons)
b2 = np.zeros((1, output_neurons))

# Learning rate
lr = 0.1


In [6]:
# Forward pass (Input to Hidden)
Z1 = np.dot(X, W1) + b1
A1 = relu(Z1)  # Activation function

# Forward pass (Hidden to Output)
Z2 = np.dot(A1, W2) + b2
A2 = sigmoid(Z2)  # Output activation function

print("Predictions before training:\n", A2)


Predictions before training:
 [[0.5       ]
 [0.34101951]
 [0.94449498]
 [0.68329752]]


In [7]:
def loss(y_true, y_pred):
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))

# Compute loss
initial_loss = loss(Y, A2)
print("Initial Loss:", initial_loss)


Initial Loss: 0.7439650431610728


In [8]:
# Compute Gradients
dL_dA2 = A2 - Y  # Gradient of loss w.r.t output
dA2_dZ2 = sigmoid_derivative(A2)  # Derivative of sigmoid
dZ2_dW2 = A1  # Input to output layer

# Gradients for output layer
dW2 = np.dot(dZ2_dW2.T, dL_dA2 * dA2_dZ2)
db2 = np.sum(dL_dA2 * dA2_dZ2, axis=0, keepdims=True)

# Gradients for hidden layer
dA1_dZ1 = relu_derivative(Z1)
dL_dZ1 = np.dot(dL_dA2 * dA2_dZ2, W2.T) * dA1_dZ1

dW1 = np.dot(X.T, dL_dZ1)
db1 = np.sum(dL_dZ1, axis=0, keepdims=True)

# Update weights and biases
W1 -= lr * dW1
b1 -= lr * db1
W2 -= lr * dW2
b2 -= lr * db2


In [10]:
epochs = 10000

for i in range(epochs):
    # Forward pass
    Z1 = np.dot(X, W1) + b1
    A1 = relu(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = sigmoid(Z2)
    
    # Compute Loss
    loss_value = loss(Y, A2)

    # Backpropagation
    dL_dA2 = A2 - Y  # Gradient of loss w.r.t output
    dA2_dZ2 = sigmoid_derivative(A2)  # Derivative of sigmoid
    dZ2_dW2 = A1  # Input to output layer

    # Gradients for output layer
    dW2 = np.dot(dZ2_dW2.T, dL_dA2 * dA2_dZ2)
    db2 = np.sum(dL_dA2 * dA2_dZ2, axis=0, keepdims=True)

    # Gradients for hidden layer
    dA1_dZ1 = relu_derivative(Z1)
    dL_dZ1 = np.dot(dL_dA2 * dA2_dZ2, W2.T) * dA1_dZ1

    dW1 = np.dot(X.T, dL_dZ1)
    db1 = np.sum(dL_dZ1, axis=0, keepdims=True)

    # Update weights and biases
    W1 -= lr * dW1
    b1 -= lr * db1
    W2 -= lr * dW2
    b2 -= lr * db2

    # Print loss every 1000 epochs
    if i % 1000 == 0:
        print(f"Epoch {i}, Loss: {loss_value:.5f}")


Epoch 0, Loss: 0.71895
Epoch 1000, Loss: 0.07094
Epoch 2000, Loss: 0.04137
Epoch 3000, Loss: 0.03136
Epoch 4000, Loss: 0.02606
Epoch 5000, Loss: 0.02267
Epoch 6000, Loss: 0.02029
Epoch 7000, Loss: 0.01849
Epoch 8000, Loss: 0.01709
Epoch 9000, Loss: 0.01595


In [11]:
# Forward pass after training
Z1 = np.dot(X, W1) + b1
A1 = relu(Z1)
Z2 = np.dot(A1, W2) + b2
A2 = sigmoid(Z2)

# Convert predictions to binary (0 or 1)
predictions = (A2 > 0.5).astype(int)

print("Final Predictions:\n", predictions)


Final Predictions:
 [[0]
 [1]
 [1]
 [0]]
