In [17]:
import numpy as np
X = np.array([[1.0], [2.0], [3.0], [4.0]]) 
y = np.array([[2.0], [4.0], [6.0], [8.0]])

np.random.seed(0)
W1 = np.random.randn(1, 3) * 0.1             # Shape: (1, 3)
b1 = np.zeros((1, 3))                        # Shape: (1, 3)
W2 = np.random.randn(3, 1) * 0.1             # Shape: (3, 1)
b2 = np.zeros((1, 1))                        # Shape: (1, 1)

In [18]:
def activation_relu(z):
    return np.maximum(0, z)

def derivative_activation_relu(z):
    return (z > 0).astype(float)

In [19]:

lr = 0.1

for epoch in range(100):

    # --- Forward ---
    z1 = X @ W1 + b1
    # ReLU: return z1 if z1 > 0 else 0
    A1 = activation_relu(z1)
    z2 = A1 @ W2 + b2
    y_hat = z2 
    
    # MSE Loss
    loss = np.mean((y - y_hat)**2)

    # --- Backward ---
    
    
    dl_dy_hat = (-2 * (y - y_hat)) / len(y)
    
    dl_dW2 = A1.T @ dl_dy_hat

   
    dl_db2 = np.sum(dl_dy_hat, axis=0, keepdims=True)

    dl_dA1 = dl_dy_hat @ W2.T

    
    d_relu = derivative_activation_relu(z1) 
    
   
    dl_dz1 = dl_dA1 * d_relu

    dl_dw1 = X.T @ dl_dz1

   
    dl_db1 = np.sum(dl_dz1, axis=0, keepdims=True)

    # --- Update ---
    W1 = W1 - (lr * dl_dw1)               # (1,3) - (1,3)
    b1 = b1 - (lr * dl_db1)               # (1,3) - (1,3)
    W2 = W2 - (lr * dl_dW2)               # (3,1) - (3,1)
    b2 = b2 - (lr * dl_db2)               # (1,1) - (1,1)
    


    


    
    

In [20]:
print("-" * 20)
test_val = np.array([[4.0]])
prediction = activation_relu(test_val @ W1 + b1) @ W2 + b2
print(f"Target for X=4 is 8.0")
print(f"Prediction: {prediction[0][0]:.4f}")

--------------------
Target for X=4 is 8.0
Prediction: 5.0000
