In [1]:
import numpy as np

In [2]:
X = np.array([
    [0, 0],
    [0, 1],
    [1, 0],
    [1, 1]
])
Y = np.array([0, 1, 1, 0])

In [3]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [4]:
def sigmoid_derivative(a):
    return a * (1 - a)

In [5]:
W_hidden = np.random.randn(2, 2)
b_hidden = np.random.rand(2) # [0, 1]

W_output = np.random.randn(2)
b_output = np.random.rand(1)

In [6]:
alpha = 0.1
epochs = 10000

In [7]:
for epoch in range(epochs):
    total_loss = 0
    
    grad_W_output = np.zeros_like(W_output)
    grad_W_hidden = np.zeros_like(W_hidden)
    grad_b_output = 0.0
    grad_b_hidden = np.zeros_like(b_hidden)
    for x, y in zip(X, Y):
        # forward propogation
        z_hidden = np.dot(W_hidden, x) + b_hidden
        h = sigmoid(z_hidden)
        z_output = np.dot(W_output, h) + b_output
        y_hat = sigmoid(z_output)
        
        # error calculation
        loss = (y_hat - y) ** 2
        total_loss += loss
        
        # backpropogation
        delta_out = -2 * (y - y_hat) * (y_hat * (1 - y_hat))
        delta_h = delta_out * W_output * (h * (1 - h))
        
        grad_W_output += delta_out * h
        grad_b_output += delta_out
        grad_W_hidden += np.outer(delta_h, x)
        grad_b_hidden += delta_h
        
    grad_W_output /= len(X)
    grad_b_output /= len(X)
    grad_W_hidden /= len(X)
    grad_b_hidden /= len(X)
        
    W_output -= alpha * grad_W_output
    W_hidden -= alpha * grad_W_hidden
    b_output -= alpha * grad_b_output
    b_hidden -= alpha * grad_b_hidden
    
    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, Loss: {total_loss / len(X)}")
        
        
    
        

Epoch 0, Loss: [0.25120733]
Epoch 1000, Loss: [0.24988246]
Epoch 2000, Loss: [0.24932502]
Epoch 3000, Loss: [0.24835427]
Epoch 4000, Loss: [0.24600183]
Epoch 5000, Loss: [0.2395718]
Epoch 6000, Loss: [0.22237414]
Epoch 7000, Loss: [0.19318252]
Epoch 8000, Loss: [0.15876781]
Epoch 9000, Loss: [0.10148208]


In [8]:
print("Final output:")
for x in X:
    z_hidden = np.dot(W_hidden, x) + b_hidden
    h = sigmoid(z_hidden)
    z_output = np.dot(W_output, h) + b_output
    y_hat = sigmoid(z_output)
    y_pred = 1 if y_hat >= 0.5 else 0
    print(f"Input: {x}, Prediction: {y_pred}, Raw prediction: {y_hat}")

Final output:
Input: [0 0], Prediction: 0, Raw prediction: [0.18279313]
Input: [0 1], Prediction: 1, Raw prediction: [0.78604338]
Input: [1 0], Prediction: 1, Raw prediction: [0.78598237]
Input: [1 1], Prediction: 0, Raw prediction: [0.2629939]
