In [None]:
"""
Here we will see how we optimise our model using automatic gradient computation, using the PyTorch
autograd package
First we will implement the linear regression model form scratch (doing every step manually)
    > Prediciton - manually
    > Gradient computation - manually
    > Loss computation - manually
    > Parameter updates - manually
    
Then, we will see how we can do this using PyTorch
    > Prediciton - PyTorch Model
    > Gradients computation - Autograd
    > Loss computation - PyTorch Loss
    > Parameter updates - PyTorch Optimizer
    
So basically, PyTorch can do most of our work for us, but we still need to know what loss and 
omptimzer to use
"""

In [1]:
# In this video, we will do the manual prediction and the autograd gradient computation

In [None]:
import numpy as np
import torch

In [8]:
# All Done manually first

# f = w =* x
# f = 2 * x (w=2)
# We are taking the real value of W to be 2???

# Training samples
X = np.array([1, 2, 3, 4], dtype=np.float32) # Training input
Y = np.array([2, 4, 6, 8], dtype=np.float32) # 2 * x, Training output

w = 0.0

# Model Prediciton
def forward(x):
    # Forward pass to follow the conventions of pytorch
    return w * x

# Loss = mean squared error
def loss(y, y_predicted):
    # y_predicted = model output
    return (((y_predicted - y)**2).mean())

# Gradient - calcualate gradient of the loss with respect to our parameters
# MSE (mean squared error) = 1 / N * ((w * x)  - y)**2
# = 1/N * (prediction - actual value) squared
# Derivative of above = dJ/dw = 1/N * 2x * (w*x - y)
# J = objecitve function

def gradient(x, y, y_predicted):
    # np.dot = top product
    # Below is the derivative formula shown above
    return np.dot( 2 * x, y_predicted - y).mean()


# Main program
print(f'Prediction before training: f(5) =  {forward(5):.3f}') # f(5) should be 10

# Training

learning_rate = 0.01
n_iters = 20 # Number of iteration

# Training loop

for epoch in range(n_iters):
    # Prediciton = forward pass
    y_pred = forward(X)
    
    # Loss
    l = loss(Y, y_pred)
    
    # Gradients
    dw = gradient(X, Y, y_pred)
    
    # Update weights
    w -= learning_rate * dw
    # This is the update formula for gradient descent
    # Our weight, W, goes in the negative direction of the learning rate * the gradient
    
    if(epoch % 2 == 0):
        print(f'Epoch {epoch  + 1}: w = {w:.3f}, loss = {l:.8f}')
    
print(f'Prediction after training: f(5) =  {forward(5):.3f}')

Prediction before training: f(5) =  0.000
Epoch 1: w = 1.200, loss = 30.00000000
Epoch 3: w = 1.872, loss = 0.76800019
Epoch 5: w = 1.980, loss = 0.01966083
Epoch 7: w = 1.997, loss = 0.00050331
Epoch 9: w = 1.999, loss = 0.00001288
Epoch 11: w = 2.000, loss = 0.00000033
Epoch 13: w = 2.000, loss = 0.00000001
Epoch 15: w = 2.000, loss = 0.00000000
Epoch 17: w = 2.000, loss = 0.00000000
Epoch 19: w = 2.000, loss = 0.00000000
Epoch 21: w = 2.000, loss = 0.00000000
Epoch 23: w = 2.000, loss = 0.00000000
Epoch 25: w = 2.000, loss = 0.00000000
Epoch 27: w = 2.000, loss = 0.00000000
Epoch 29: w = 2.000, loss = 0.00000000
Epoch 31: w = 2.000, loss = 0.00000000
Epoch 33: w = 2.000, loss = 0.00000000
Epoch 35: w = 2.000, loss = 0.00000000
Epoch 37: w = 2.000, loss = 0.00000000
Epoch 39: w = 2.000, loss = 0.00000000
Epoch 41: w = 2.000, loss = 0.00000000
Epoch 43: w = 2.000, loss = 0.00000000
Epoch 45: w = 2.000, loss = 0.00000000
Epoch 47: w = 2.000, loss = 0.00000000
Epoch 49: w = 2.000, loss 

In [11]:
# Let's do this again, but replace the gradient calculations with autograd

# f = w =* x
# f = 2 * x (w=2)
# We are taking the real value of W to be 2???

# Training samples
X = torch.tensor([1, 2, 3, 4], dtype=torch.float32) # Training input
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32) # 2 * x, Training output

# w also has to be a tensor in this model
# Since we are interested in the gradient of our loss with respect ot w, we need to specify
# ... that w requires the gradient calculattionn
w = torch.tensor(0.0, dtype=torch.float32, requires_grad = True) # Initially 0

# Model Prediciton
def forward(x):
    # Forward pass to follow the conventions of pytorch
    return w * x

# Loss = mean squared error
def loss(y, y_predicted):
    # y_predicted = model output
    return (((y_predicted - y)**2).mean())

# Remove manually computed gradient

# Main program
print(f'Prediction before training: f(5) =  {forward(5):.3f}') # f(5) should be 10

# Training

learning_rate = 0.01
n_iters = 20 # Number of iteration

# Training loop

for epoch in range(n_iters):
    # Prediciton = forward pass
    y_pred = forward(X)
    
    # Loss
    l = loss(Y, y_pred)
    
    # Gradients = backward pass (remember: back propogation)
    l.backward() # Calculates dLoss/dWeights (dl/dw)
    # PyTorch does all the calculations for us
    
    # Update weights
    """
    When using PyTorch, we cannot have this operation in our gradient tracking graph
        > Cannot be part of the computation graph
        > We need to wrap it in torch.no_grad
    """
    with torch.no_grad():
        w -= learning_rate * w.grad
    # This is the update formula for gradient descent
    # Our weight, W, goes in the negative direction of the learning rate * the gradient
    """
    We also need to zero our gradients, because otherwise l.backward() will keep accumulating
    the w.grad attribute
    """
    w.grad.zero_()
    
    if(epoch % 2 == 0):
        print(f'Epoch {epoch  + 1}: w = {w:.3f}, loss = {l:.8f}')
    
print(f'Prediction after training: f(5) =  {forward(5):.3f}')
"""
It ends up not as good as the one above becaus the back propogation is not as exact as the numerical
gradient compuation
"""

Prediction before training: f(5) =  0.000
Epoch 1: w = 0.300, loss = 30.00000000
Epoch 3: w = 0.772, loss = 15.66018772
Epoch 5: w = 1.113, loss = 8.17471695
Epoch 7: w = 1.359, loss = 4.26725292
Epoch 9: w = 1.537, loss = 2.22753215
Epoch 11: w = 1.665, loss = 1.16278565
Epoch 13: w = 1.758, loss = 0.60698116
Epoch 15: w = 1.825, loss = 0.31684780
Epoch 17: w = 1.874, loss = 0.16539653
Epoch 19: w = 1.909, loss = 0.08633806
Prediction after training: f(5) =  9.612
