In [1]:
import numpy as np

In [54]:
np.random.seed(42)

# Goal y = 2x
x = np.array([np.arange(0, 10)]).T
y = 2 * x

# Initialize params, has 1 hidden layers with 1 neurons, no bias
def init_params(input_shape, output_shape):
  W1 = np.random.rand(input_shape, input_shape) * 0.01
  W2 = np.random.rand(input_shape, output_shape) * 0.01
  return W1, W2

# Forward propagation
def forward(x, W1, W2):
  hidden =  x @ W1
  output = hidden @ W2
  return hidden, output

# MSE loss
def MSE(y_true, y_pred):
  return np.mean(np.square(y_true-y_pred))

# Backward propagation
def backward(x, y, W1, W2, H, O, lr=0.01):
  dL_dO = 2 * (O - y) / O.shape[0]
  
  dO_dW2 = H
  dL_dW2 = dO_dW2.T @ dL_dO
  
  dO_dH = W2
  dH_dW1 = x
  dL_dW1 = dH_dW1.T @ (dL_dO @ dO_dH.T)

  W2 -= lr * dL_dW2
  W1 -= lr * dL_dW1

  return W1, W2

# Training loop
epochs = 1000
W1, W2 = init_params(1, 1)
for epoch in range(epochs):
  H, O = forward(x, W1, W2)

  if epoch % 100 == 0:
    loss = MSE(y, O)
    print(f"Epoch {epoch}, Loss: {loss:.4f}")

  W1, W2 = backward(x, y, W1, W2, H, O)

print("\nFinal Weights:")
print("W1:", W1)
print("W2:", W2)
print("\nPrediction:\n", forward(x, W1, W2)[1])
print("\ny True:\n", y)

Epoch 0, Loss: 113.9959
Epoch 100, Loss: 8.4656
Epoch 200, Loss: 8.4656
Epoch 300, Loss: 8.4656
Epoch 400, Loss: 8.4656
Epoch 500, Loss: 8.4656
Epoch 600, Loss: 8.4656
Epoch 700, Loss: 8.4656
Epoch 800, Loss: 8.4656
Epoch 900, Loss: 8.4656

Final Weights:
W1: [[1.59530964]]
W2: [[1.59530964]]

Prediction:
 [[ 0.        ]
 [ 2.54501285]
 [ 5.09002571]
 [ 7.63503856]
 [10.18005141]
 [12.72506427]
 [15.27007712]
 [17.81508997]
 [20.36010282]
 [22.90511568]]

y True:
 [[ 0]
 [ 2]
 [ 4]
 [ 6]
 [ 8]
 [10]
 [12]
 [14]
 [16]
 [18]]


In [53]:
np.random.seed(42)

# Goal y = 2x + 3
x = np.array([np.arange(0, 10)]).T
y = 2 * x + 3

# Initialize params, has 1 hidden layers with 1 neurons, with bias
def init_params(input_shape, output_shape):
  W1 = np.random.rand(input_shape, input_shape) * 0.01
  b1 = np.random.random()
  W2 = np.random.rand(input_shape, output_shape) * 0.01
  b2 = np.random.random()
  return W1, b1, W2, b2

# Forward propagation
def forward(x, W1, b1, W2, b2):
  hidden =  x @ W1 + b1
  output = hidden @ W2 + b2
  return hidden, output

# MSE loss
def MSE(y_true, y_pred):
  return np.mean(np.square(y_true-y_pred))

# Backward propagation
def backward(x, y, W1, b1, W2, b2, H, O, lr=0.01):
  dL_dO = 2 * (O - y) / O.shape[0]

  dL_db2 = np.sum(dL_dO)
  
  dO_dW2 = H
  dL_dW2 = dO_dW2.T @ dL_dO
  
  dO_dH = W2
  dL_dH = dL_dO @ dO_dH.T
  dL_db1 = np.sum(dL_dH)

  dH_dW1 = x
  dL_dW1 = dH_dW1.T @ (dL_dH)

  b2 -= lr * dL_db2
  W2 -= lr * dL_dW2
  b1 -= lr * dL_db1
  W1 -= lr * dL_dW1

  return W1, b1, W2, b2

# Training loop
epochs = 1000
W1, b1, W2, b2 = init_params(1, 1)
for epoch in range(epochs):
  H, O = forward(x, W1, b1, W2, b2)

  if epoch % 100 == 0:
    loss = MSE(y, O)
    print(f"Epoch {epoch}, Loss: {loss:.4f}")

  W1, b1, W2, b2 = backward(x, y, W1, b1, W2, b2, H, O)

print("\nFinal Params:")
print(f"W1: {W1} | b1: {b1}")
print(f"W2: {W2} | b2: {b2}")
print("\nPrediction:\n", forward(x, W1, b1, W2, b2)[1])
print("\ny True:\n", y)

Epoch 0, Loss: 162.8282
Epoch 100, Loss: 10.3533
Epoch 200, Loss: 7.2105
Epoch 300, Loss: 5.1797
Epoch 400, Loss: 3.7506
Epoch 500, Loss: 2.7163
Epoch 600, Loss: 1.9633
Epoch 700, Loss: 1.4154
Epoch 800, Loss: 1.0179
Epoch 900, Loss: 0.7302

Final Params:
W1: [[1.54516413]] | b1: -1.2619867906488234
W2: [[1.38274399]] | b2: 4.737832771104995

Prediction:
 [[ 2.99282812]
 [ 5.12939453]
 [ 7.26596094]
 [ 9.40252735]
 [11.53909375]
 [13.67566016]
 [15.81222657]
 [17.94879298]
 [20.08535939]
 [22.22192579]]

y True:
 [[ 3]
 [ 5]
 [ 7]
 [ 9]
 [11]
 [13]
 [15]
 [17]
 [19]
 [21]]
