# PyTorch Tutorial 05 - Gradient Descent with Autograd and Backpropagation

In [1]:
import numpy as np
import torch

# Numpy Implementation

In [11]:
# we use a linear regression as:
# f = w * x
# let: w = 2

X = np.array([1, 2, 3, 4], dtype=np.float32)
Y = np.array([2, 4, 6, 8], dtype=np.float32)

w = 0.0 # lets choose an initial weights for the start

# model prediction
def forward(x):
    return w * x

# calculate loss
def loss(y, y_predicted):
    return ((y_predicted - y)**2).mean()

# calculate gradients
# lets use MSE
# MSE = 1 / N * ( w*x -y )**2

# the derivative of our objective function
# dJ / dw = 1/N * (2x * (w*x - y))

def gradient(x, y, y_predicted):
    return np.dot(2*x, y_predicted - y).mean()

In [12]:
# Lets start our training

learning_rate = 0.01
n_iters = 10

print(f"Prediction before training: f(5) = {forward(5):.3f}")
print("******************\n")


for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    
    # Loss
    l = loss(Y, y_pred)

    # gradients = backward pass
    dw = gradient(X, Y, y_pred)

    # update weights
    # Update formula in Gradient Descent formula is always minus. 
    # If it was Gradient Ascent, then it would be always plus

    w -= learning_rate * dw

    if epoch%1 == 0:
        print(f"epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}")
        print(f"Prediction during training: f(5) = {forward(5):.3f}")
        print("\n")

print("******************\n")
print(f"Prediction after training: f(5) = {forward(5):.3f}")

Prediction before training: f(5) = 0.000
******************

epoch 1: w = 1.200, loss = 30.00000000
Prediction during training: f(5) = 6.000


epoch 2: w = 1.680, loss = 4.79999924
Prediction during training: f(5) = 8.400


epoch 3: w = 1.872, loss = 0.76800019
Prediction during training: f(5) = 9.360


epoch 4: w = 1.949, loss = 0.12288000
Prediction during training: f(5) = 9.744


epoch 5: w = 1.980, loss = 0.01966083
Prediction during training: f(5) = 9.898


epoch 6: w = 1.992, loss = 0.00314570
Prediction during training: f(5) = 9.959


epoch 7: w = 1.997, loss = 0.00050332
Prediction during training: f(5) = 9.984


epoch 8: w = 1.999, loss = 0.00008053
Prediction during training: f(5) = 9.993


epoch 9: w = 1.999, loss = 0.00001288
Prediction during training: f(5) = 9.997


epoch 10: w = 2.000, loss = 0.00000206
Prediction during training: f(5) = 9.999


******************

Prediction after training: f(5) = 9.999


In [15]:
X = np.array([1, 2, 3, 4], dtype=np.float32)
Y = np.array([2, 4, 6, 8], dtype=np.float32)

w = 0.0 # lets choose an initial weights for the start



# Lets start our training

learning_rate = 0.01
n_iters = 20

print(f"Prediction before training: f(5) = {forward(5):.3f}")
print("******************\n")


for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    
    # Loss
    l = loss(Y, y_pred)

    # gradients = backward pass
    dw = gradient(X, Y, y_pred)

    # update weights
    # Update formula in Gradient Descent formula is always minus. 
    # If it was Gradient Ascent, then it would be always plus

    w -= learning_rate * dw

    if epoch%2 == 0:
        print(f"epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}")
        print(f"Prediction during training: f(5) = {forward(5):.3f}")
        print("\n")

print("******************\n")
print(f"Prediction after training: f(5) = {forward(5):.3f}")

Prediction before training: f(5) = 0.000
******************

epoch 1: w = 1.200, loss = 30.00000000
Prediction during training: f(5) = 6.000


epoch 3: w = 1.872, loss = 0.76800019
Prediction during training: f(5) = 9.360


epoch 5: w = 1.980, loss = 0.01966083
Prediction during training: f(5) = 9.898


epoch 7: w = 1.997, loss = 0.00050332
Prediction during training: f(5) = 9.984


epoch 9: w = 1.999, loss = 0.00001288
Prediction during training: f(5) = 9.997


epoch 11: w = 2.000, loss = 0.00000033
Prediction during training: f(5) = 10.000


epoch 13: w = 2.000, loss = 0.00000001
Prediction during training: f(5) = 10.000


epoch 15: w = 2.000, loss = 0.00000000
Prediction during training: f(5) = 10.000


epoch 17: w = 2.000, loss = 0.00000000
Prediction during training: f(5) = 10.000


epoch 19: w = 2.000, loss = 0.00000000
Prediction during training: f(5) = 10.000


******************

Prediction after training: f(5) = 10.000


# Torch Implementation

In [16]:
# we use a linear regression as:
# f = w * x
# let: w = 2

# X = np.array([1, 2, 3, 4], dtype=np.float32)
X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

# w must be tensor as well
w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True) # lets choose an initial weights for the start

# model prediction
def forward(x):
    return w * x

# calculate loss
def loss(y, y_predicted):
    return ((y_predicted - y)**2).mean()

# calculate gradients
# lets use MSE
# MSE = 1 / N * ( w*x -y )**2

# the derivative of our objective function
# dJ / dw = 1/N * (2x * (w*x - y))

# we dont need gradient function in torch library
# def gradient(x, y, y_predicted):
#     return np.dot(2*x, y_predicted - y).mean()


In [17]:
# Lets start our training

learning_rate = 0.01
n_iters = 20

print(f"Prediction before training: f(5) = {forward(5):.3f}")
print("******************\n")

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    
    # Loss
    l = loss(Y, y_pred)

    # gradients = backward pass
    # dw = gradient(X, Y, y_pred)  # we dont use this line of code in torch
    l.backward()  # this calculate dl/dw

    # update weights
    with torch.no_grad():
        w -= learning_rate * w.grad  # instead of dw, we use w.grad to denote gradients of the given epoch

    # we must clear our memory of w to prevent accumulation of the w values
    w.grad.zero_()

    if epoch%2 == 0:
        print(f"epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}")
        print(f"Prediction during training: f(5) = {forward(5):.3f}")
        print("\n")

print("******************\n")
print(f"Prediction after training: f(5) = {forward(5):.3f}")




Prediction before training: f(5) = 0.000
******************

epoch 1: w = 0.300, loss = 30.00000000
Prediction during training: f(5) = 1.500


epoch 3: w = 0.772, loss = 15.66018772
Prediction during training: f(5) = 3.859


epoch 5: w = 1.113, loss = 8.17471695
Prediction during training: f(5) = 5.563


epoch 7: w = 1.359, loss = 4.26725292
Prediction during training: f(5) = 6.794


epoch 9: w = 1.537, loss = 2.22753215
Prediction during training: f(5) = 7.684


epoch 11: w = 1.665, loss = 1.16278565
Prediction during training: f(5) = 8.327


epoch 13: w = 1.758, loss = 0.60698116
Prediction during training: f(5) = 8.791


epoch 15: w = 1.825, loss = 0.31684780
Prediction during training: f(5) = 9.126


epoch 17: w = 1.874, loss = 0.16539653
Prediction during training: f(5) = 9.369


epoch 19: w = 1.909, loss = 0.08633806
Prediction during training: f(5) = 9.544


******************

Prediction after training: f(5) = 9.612


In [22]:
X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True) # lets choose an initial weights for the start

# Lets start our training

learning_rate = 0.01
n_iters = 100

print(f"Prediction before training: f(5) = {forward(5):.3f}")
print("******************\n")

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    
    # Loss
    l = loss(Y, y_pred)

    # gradients = backward pass
    # dw = gradient(X, Y, y_pred)  # we dont use this line of code in torch
    l.backward()  # this calculate dl/dw

    # update weights
    with torch.no_grad():
        w -= learning_rate * w.grad  # instead of dw, we use w.grad to denote gradients of the given epoch

    # we must clear our memory of w to prevent accumulation of the w values
    w.grad.zero_()

    if epoch%10 == 0:
        print(f"epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}")
        print(f"Prediction during training: f(5) = {forward(5):.3f}")
        print("\n")

print("******************\n")
print(f"Prediction after training: f(5) = {forward(5):.3f}")




Prediction before training: f(5) = 0.000
******************

epoch 1: w = 0.300, loss = 30.00000000
Prediction during training: f(5) = 1.500


epoch 11: w = 1.665, loss = 1.16278565
Prediction during training: f(5) = 8.327


epoch 21: w = 1.934, loss = 0.04506890
Prediction during training: f(5) = 9.671


epoch 31: w = 1.987, loss = 0.00174685
Prediction during training: f(5) = 9.935


epoch 41: w = 1.997, loss = 0.00006770
Prediction during training: f(5) = 9.987


epoch 51: w = 1.999, loss = 0.00000262
Prediction during training: f(5) = 9.997


epoch 61: w = 2.000, loss = 0.00000010
Prediction during training: f(5) = 10.000


epoch 71: w = 2.000, loss = 0.00000000
Prediction during training: f(5) = 10.000


epoch 81: w = 2.000, loss = 0.00000000
Prediction during training: f(5) = 10.000


epoch 91: w = 2.000, loss = 0.00000000
Prediction during training: f(5) = 10.000


******************

Prediction after training: f(5) = 10.000
