In [1]:
import numpy as np

In [None]:
np.random.seed(42)

# Goal y = 2x
x = np.array([np.arange(0, 10)]).T
y = 2 * x

# Initialize params, has 1 hidden layers with 1 neurons, no bias
def init_params(input_shape, output_shape):
  W1 = np.random.rand(input_shape, input_shape) * 0.01
  W2 = np.random.rand(input_shape, output_shape) * 0.01
  return W1, W2

# Forward propagation
def forward(x, W1, W2):
  hidden =  x @ W1
  output = hidden @ W2
  return hidden, output

# MSE loss
def MSE(y_true, y_pred):
  return np.mean(np.square(y_true-y_pred))

# Backward propagation
def backward(x, y, W1, W2, H, O, lr=0.01):
  dL_dO = 2 * (O - y) / O.shape[0]
  
  dO_dW2 = H
  dL_dW2 = dO_dW2.T @ dL_dO
  
  dO_dH = W2
  dH_dW1 = x
  dL_dW1 = dH_dW1.T @ (dL_dO @ dO_dH.T)

  # Clip weight to handle overflow
  # Because we don't use activation function here
  np.clip(dL_dW1, -1, 1, out=dL_dW1)
  np.clip(dL_dW2, -1, 1, out=dL_dW2)

  W2 -= lr * dL_dW2
  W1 -= lr * dL_dW1

  return W1, W2

# Training loop
epochs = 1000
W1, W2 = init_params(1, 1)
for epoch in range(epochs):
  H, O = forward(x, W1, W2)

  if epoch % 100 == 0:
    loss = MSE(y, O)
    print(f"Epoch {epoch}, Loss: {loss:.4f}")

  W1, W2 = backward(x, y, W1, W2, H, O)

print("\nFinal Weights:")
print("W1:", W1)
print("W2:", W2)
print("\nPrediction:\n", forward(x, W1, W2)[1])
print("\ny True:\n", y)

Epoch 0, Loss: 113.9959
Epoch 100, Loss: 28.0720
Epoch 200, Loss: 0.0049
Epoch 300, Loss: 0.0049
Epoch 400, Loss: 0.0049
Epoch 500, Loss: 0.0049
Epoch 600, Loss: 0.0049
Epoch 700, Loss: 0.0049
Epoch 800, Loss: 0.0049
Epoch 900, Loss: 0.0049

Final Weights:
W1: [[1.41883523]]
W2: [[1.41886676]]

Prediction:
 [[ 0.        ]
 [ 2.01313815]
 [ 4.02627629]
 [ 6.03941444]
 [ 8.05255258]
 [10.06569073]
 [12.07882888]
 [14.09196702]
 [16.10510517]
 [18.11824332]]

y True:
 [[ 0]
 [ 2]
 [ 4]
 [ 6]
 [ 8]
 [10]
 [12]
 [14]
 [16]
 [18]]
