In [40]:
# Understanding core basics first so everything will be done manually for now

import numpy as np

# f = w0 + w1*x1 + w2*x2 + ... + wn*xn

# f = 2 * x

x = np.array([1, 2, 3, 4, 5], dtype=np.float32)
y = np.array([2, 4, 6, 8, 10], dtype=np.float32)

w = np.random.rand(1)  # Random weight initialization

In [28]:
# model prediction
def forward(x):
    return w * x

In [29]:
# loss function - mean squared error
def loss(y, y_pred):
    return ((y_pred - y) ** 2).mean()  # Mean Squared Error (MSE)

In [30]:
# gradient descent
# mse = 1/n * (w*x - y)^2
# dL/dw = 1/n * 2x(w*x - y)
def gradient(x, y, y_pred):
    return np.dot(2 * x, y_pred - y).mean()  # Gradient of the loss function

In [36]:
print(f"Initial weight: {w}, Initial loss: {loss(y, forward(x))}")
print(f"Prediction before training: f(5) = {forward(5)}")

Initial weight: [0.08369585], Initial loss: 40.3944374779815
Prediction before training: f(5) = [0.41847926]


In [None]:
# Training loop
learning_rate = 0.01
n_epochs = 20

for epoch in range(n_epochs):
    # Forward pass
    y_pred = forward(x)

    # Compute loss
    l = loss(y, y_pred)

    # Compute gradients
    dw = gradient(x, y, y_pred)

    # Update weights
    w -= learning_rate * dw

    if epoch % 2 == 0:
        print(f"Epoch {epoch}, Weight: {w}, Loss: {l}")

Epoch 0, Weight: 2.0, Loss: 0.0
Epoch 2, Weight: 2.0, Loss: 0.0
Epoch 4, Weight: 2.0, Loss: 0.0
Epoch 6, Weight: 2.0, Loss: 0.0
Epoch 8, Weight: 2.0, Loss: 0.0
Epoch 10, Weight: 2.0, Loss: 0.0
Epoch 12, Weight: 2.0, Loss: 0.0
Epoch 14, Weight: 2.0, Loss: 0.0
Epoch 16, Weight: 2.0, Loss: 0.0
Epoch 18, Weight: 2.0, Loss: 0.0


In [24]:
print(f"Prediction after training: f(5) = {forward(5)}")
print(f"Final weight: {w}, Final loss: {loss(y, forward(x))}")

Prediction after training: f(5) = [10.]
Final weight: [2.], Final loss: 0.0


In [71]:
# Now we will use pytorch to do the same thing
import torch

x = torch.tensor([1, 2, 3, 4, 5], dtype=torch.float32)
y = torch.tensor([2, 4, 6, 8, 10], dtype=torch.float32)

# Random weight initialization
w = torch.rand(1, dtype=torch.float32, requires_grad=True)

# tensor weight initialization to 0.0 - for some reason this doesn't work properly. the gradient never goes to 2, it stays around 1.99999...
# w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)


In [72]:
# model prediction
def forward(x):
    return w * x


# loss function - mean squared error
def loss(y, y_pred):
    return ((y_pred - y) ** 2).mean()  # Mean Squared Error (MSE)


In [73]:
print(f"Initial weight: {w}, Initial loss: {loss(y, forward(x))}")
print(f"Prediction before training: f(5) = {forward(5)}")

Initial weight: tensor([0.1110], requires_grad=True), Initial loss: 39.249542236328125
Prediction before training: f(5) = tensor([0.5552], grad_fn=<MulBackward0>)


In [74]:
# Training loop
learning_rate = 0.01
n_epochs = 50

for epoch in range(n_epochs):
    # Forward pass
    y_pred = forward(x)

    # Compute loss
    l = loss(y, y_pred)

    # Compute gradients using backward pass
    l.backward()  # This computes the gradients of the loss w.r.t.(with respect to) the weights

    # Update weights
    with torch.no_grad():
        w -= learning_rate * w.grad

    # Zero the gradients after updating weights
    w.grad.zero_()

    # Print loss and weight every 2 epochs
    if epoch % 2 == 0:
        print(f"Epoch {epoch}, Weight: {w}, Loss: {l}")


Epoch 0, Weight: tensor([0.5266], requires_grad=True), Loss: 39.249542236328125
Epoch 2, Weight: tensor([1.1036], requires_grad=True), Loss: 14.528242111206055
Epoch 4, Weight: tensor([1.4546], requires_grad=True), Loss: 5.377638339996338
Epoch 6, Weight: tensor([1.6682], requires_grad=True), Loss: 1.9905364513397217
Epoch 8, Weight: tensor([1.7981], requires_grad=True), Loss: 0.7367981672286987
Epoch 10, Weight: tensor([1.8772], requires_grad=True), Loss: 0.2727263569831848
Epoch 12, Weight: tensor([1.9253], requires_grad=True), Loss: 0.1009499579668045
Epoch 14, Weight: tensor([1.9545], requires_grad=True), Loss: 0.037366557866334915
Epoch 16, Weight: tensor([1.9723], requires_grad=True), Loss: 0.013831275515258312
Epoch 18, Weight: tensor([1.9832], requires_grad=True), Loss: 0.005119616165757179
Epoch 20, Weight: tensor([1.9898], requires_grad=True), Loss: 0.0018949974328279495
Epoch 22, Weight: tensor([1.9938], requires_grad=True), Loss: 0.0007014230359345675
Epoch 24, Weight: tens

In [75]:
print(f"Prediction after training: f(5) = {forward(5)}")
print(f"Final weight: {w}, Final loss: {loss(y, forward(x))}")


Prediction after training: f(5) = tensor([10.0000], grad_fn=<MulBackward0>)
Final weight: tensor([2.0000], requires_grad=True), Final loss: 6.402842700481415e-10
