# backpropagation

## forward pass: compute loss
## compute local gradients
## backward pass compute dloss/dweights using the chain rule 

In [1]:
import torch

In [21]:
x = torch.tensor(1.0)
y = torch.tensor(2.0)

w = torch.tensor(1.0, requires_grad=True)

#backward pass and compute the loss
y_hat = w * x
loss = (y_hat - y)**2

print(loss)

#backward pass
loss.backward()
print(w.grad)

#update our weight

tensor(1., grad_fn=<PowBackward0>)
tensor(-2.)


In [3]:
import numpy as np

In [22]:
# f = w * x
# f = 2 * x
X = np.array([1,2,3,4], dtype=np.float32)
Y = np.array([2,4,6,8], dtype=np.float32)
w = 0.0

#model prediction
def forward(x):
    return w * x
#loss
def loss(y, y_predicted):
    return ((y_predicted - y)**2).mean()
#gradients
#MSE = 1/N * (w*x - y)**2
#dJ/dw = 1/N 2x(w*x-y)
def gradient(x,y,y_predicted):
    return np.dot(2*x, y_predicted-y).mean()
    
print(f'Prediction before training:  f(5) = {forward(5):.3f}')

#training
learning_rate = 0.01
n_iters = 20
for epoch in range(n_iters):
    y_pred = forward(X)
    l = loss(Y,y_pred)
    dw = gradient(X,Y,y_pred)
    #core of gradient descent: we need to update the weight to counter-direction of gradient(dw)
    #when dw is +, it means the more dw, the mors loss -> w -= learning_rate * dw is decresing the w
    #when dw is -, it means the more dw, the less loss -> w -= learning_rate * dw is increasing the w
    w -= learning_rate * dw
    if epoch % 1 ==0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')
print(f'Prediction after training: f(5) = {forward(5):.3f}')

Prediction before training:  f(5) = 0.000
epoch 1: w = 1.200, loss = 30.00000000
epoch 2: w = 1.680, loss = 4.79999924
epoch 3: w = 1.872, loss = 0.76800019
epoch 4: w = 1.949, loss = 0.12288000
epoch 5: w = 1.980, loss = 0.01966083
epoch 6: w = 1.992, loss = 0.00314570
epoch 7: w = 1.997, loss = 0.00050332
epoch 8: w = 1.999, loss = 0.00008053
epoch 9: w = 1.999, loss = 0.00001288
epoch 10: w = 2.000, loss = 0.00000206
epoch 11: w = 2.000, loss = 0.00000033
epoch 12: w = 2.000, loss = 0.00000005
epoch 13: w = 2.000, loss = 0.00000001
epoch 14: w = 2.000, loss = 0.00000000
epoch 15: w = 2.000, loss = 0.00000000
epoch 16: w = 2.000, loss = 0.00000000
epoch 17: w = 2.000, loss = 0.00000000
epoch 18: w = 2.000, loss = 0.00000000
epoch 19: w = 2.000, loss = 0.00000000
epoch 20: w = 2.000, loss = 0.00000000
Prediction after training: f(5) = 10.000


In [20]:
# f = w * x
# f = 2 * x
X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32)
w = torch.tensor(0.0, dtype=torch.float32, requires_grad = True)

#model prediction
def forward(x):
    return w * x
#loss
def loss(y, y_predicted):
    return ((y_predicted - y)**2).mean()
#gradients
#MSE = 1/N * (w*x - y)**2
#dJ/dw = 1/N 2x(w*x-y)
def gradient(x,y,y_predicted):
    return np.dot(2*x, y_predicted-y).mean()
    
print(f'prediction before training:  f(5) = {forward(5):.3f}')

#training
learning_rate = 0.01
n_iters = 100
for epoch in range(n_iters):
    y_pred = forward(X)
    
    l = loss(Y,y_pred)

    #l.backward()
    #w.grad
    #dl/dw
    l.backward()
    with torch.no_grad():
        w -= learning_rate * w.grad
    #zero gradient: clean previous gradient data
    w.grad.zero_()
    
    if epoch % 10 ==0:
        print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')
print(f'Prediction after training: f(5) = {forward(5):.3f}')

prediction before training:  f(5) = 0.000
epoch 1: w = 0.300, loss = 30.00000000
epoch 11: w = 1.665, loss = 1.16278565
epoch 21: w = 1.934, loss = 0.04506890
epoch 31: w = 1.987, loss = 0.00174685
epoch 41: w = 1.997, loss = 0.00006770
epoch 51: w = 1.999, loss = 0.00000262
epoch 61: w = 2.000, loss = 0.00000010
epoch 71: w = 2.000, loss = 0.00000000
epoch 81: w = 2.000, loss = 0.00000000
epoch 91: w = 2.000, loss = 0.00000000
Prediction after training: f(5) = 10.000
