# Gradient Descent using Autograd

**1st step**
- Prediction --> Manual
- Gradients Computation --> Manual
- Loss Computation --> Manual
- Parameter Updates --> Manual

**2nd Step**
- Prediction --> Manual
- Gradients Computation --> autograd
- Loss Computation --> Manual
- Parameter Updates --> Manual

**3rd Step**
- Prediction --> Manual
- Gradients Computation --> autograd
- Loss Computation --> pytorch Loss
- Parameter Updates --> pytorch Optimizer

**4th Step**
- Prediction --> pytorch model
- Gradients Computation --> pytorch autograd
- Loss Computation --> pytorch Loss
- Parameter Updates --> pytorch Optimizer

## Step 1

In [6]:
import numpy as np

## linear regression algorithm. we don't care about bias here
# f = w * x  
# f = 2 * x --> this is our example

X = np.array([1, 2, 3, 4], dtype=np.float32) # some training example
Y = np.array([2, 4, 6, 8], dtype=np.float32) # some training example

w = 0.0 # initial weight

# model prediction
def forward(x):
    return w * x

# loss = MSE 
def loss(y, y_predicted):
    return ((y_predicted - y)**2).mean()

# gradient
# MSE = 1/N * (w*x - y)**2
# dJ/dW = 1/N * 2x(w*x - y)
def gradient(x, y, y_predicted):
    return np.dot(2*x, y_pred-y).mean()


print(f'Prediction before training : f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iters = 20
for epoch in range(n_iters):
    
    # predictions = forward pass
    y_pred = forward(X)
    
    # loss
    l = loss(Y, y_pred)
    
    # gradients
    dw = gradient(X,Y,y_pred)
    
    # update weights
    w = w - learning_rate * dw
    
    if epoch % 2 == 0:
        print(f'epoch {epoch + 1}: w = {w:.3f}, loss={l:.8f}')

print(f'Prediction after training : f(5) = {forward(5):.3f}')

Prediction before training : f(5) = 0.000
epoch 1: w = 1.200, loss=30.00000000
epoch 3: w = 1.872, loss=0.76800019
epoch 5: w = 1.980, loss=0.01966083
epoch 7: w = 1.997, loss=0.00050331
epoch 9: w = 1.999, loss=0.00001288
epoch 11: w = 2.000, loss=0.00000033
epoch 13: w = 2.000, loss=0.00000001
epoch 15: w = 2.000, loss=0.00000000
epoch 17: w = 2.000, loss=0.00000000
epoch 19: w = 2.000, loss=0.00000000
Prediction after training : f(5) = 10.000


In [7]:
# Let's replace the gradients using autograd

## Step 2

In [16]:
import torch

# some training examples

X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# model prediction
def forward(x):
    return w * x

# loss = MSE 
def loss(y, y_predicted):
    return ((y_predicted - y)**2).mean()


print(f'Prediction before training : f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.1
n_iters = 20
for epoch in range(n_iters):
    
    # predictions = forward pass
    y_pred = forward(X)
    
    # loss
    l = loss(Y, y_pred)
    
    # gradients = backward pass
    l.backward() # this will calculate dL/dW
    
    # update weights
    with torch.no_grad():
        w -= learning_rate * w.grad
    
    # zero gradients
    w.grad.zero_()
    
    if epoch % 2 == 0:
        print(f'epoch {epoch + 1}: w = {w:.3f}, loss={l:.8f}')

print(f'Prediction after training : f(5) = {forward(5):.3f}')

Prediction before training : f(5) = 0.000
epoch 1: w = 3.000, loss=30.00000000
epoch 3: w = 2.250, loss=1.87500000
epoch 5: w = 2.062, loss=0.11718750
epoch 7: w = 2.016, loss=0.00732422
epoch 9: w = 2.004, loss=0.00045776
epoch 11: w = 2.001, loss=0.00002861
epoch 13: w = 2.000, loss=0.00000179
epoch 15: w = 2.000, loss=0.00000011
epoch 17: w = 2.000, loss=0.00000001
epoch 19: w = 2.000, loss=0.00000000
Prediction after training : f(5) = 10.000


## Step 3

In [19]:
# General training pipeline in pytorch
# 1 ) Design models (input_size, output_size, forward pass)
# 2 ) Construct loss and optimizer
# 3 ) Training loop
    # - forward pass: compute the prediction
    # - backward pass: gradients
    # - update weights

import torch
import torch.nn as nn

# some training examples

X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# model prediction
def forward(x):
    return w * x

print(f'Prediction before training : f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.1
n_iters = 20
loss = nn.MSELoss()
optimizer = torch.optim.SGD([w], lr=learning_rate)
for epoch in range(n_iters):
    
    # predictions = forward pass
    y_pred = forward(X)
    
    # loss
    l = loss(Y, y_pred)
    
    # gradients = backward pass
    l.backward() # this will calculate dL/dW

    # update weights
    optimizer.step()
    
    # zero gradients
    optimizer.zero_grad()
    
    if epoch % 2 == 0:
        print(f'epoch {epoch + 1}: w = {w:.3f}, loss={l:.8f}')

print(f'Prediction after training : f(5) = {forward(5):.3f}')

Prediction before training : f(5) = 0.000
epoch 1: w = 3.000, loss=30.00000000
epoch 3: w = 2.250, loss=1.87500000
epoch 5: w = 2.062, loss=0.11718750
epoch 7: w = 2.016, loss=0.00732422
epoch 9: w = 2.004, loss=0.00045776
epoch 11: w = 2.001, loss=0.00002861
epoch 13: w = 2.000, loss=0.00000179
epoch 15: w = 2.000, loss=0.00000011
epoch 17: w = 2.000, loss=0.00000001
epoch 19: w = 2.000, loss=0.00000000
Prediction after training : f(5) = 10.000


## Step 4

In [30]:
# General training pipeline in pytorch
# 1 ) Design models (input_size, output_size, forward pass)
# 2 ) Construct loss and optimizer
# 3 ) Training loop
    # - forward pass: compute the prediction
    # - backward pass: gradients
    # - update weights

import torch
import torch.nn as nn

# some training examples

X = torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
Y = torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)

X_test = torch.tensor([5], dtype=torch.float32)

# we could also do
# X = torch.tensor([1,2,3,4], dtype=torch.float32)
# Y = torch.tensor([2,4,6,8], dtype=torch.float32)

# X = X.reshape(-1,1)
# Y = Y.reshape(-1,1)

n_samples, n_features = X.shape
print(n_samples, n_features)

input_size = n_features
output_size = n_features

model = nn.Linear(input_size, output_size)

# we can define model using the below codes also
class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        # define layers
        self.lin = nn.Linear(input_dim, output_dim)
    
    def forward(self, x):
        return self.lin(x)


model = LinearRegression(input_size, output_size)


print(f'Prediction before training : f(5) = {model(X_test).item():.3f}')

# Training
learning_rate = 0.1
n_iters = 50
loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(n_iters):
    
    # predictions = forward pass
    y_pred = model(X)
    
    # loss
    l = loss(Y, y_pred)
    
    # gradients = backward pass
    l.backward() # this will calculate dL/dW

    # update weights
    optimizer.step()
    
    # zero gradients
    optimizer.zero_grad()
    
    if epoch % 5 == 0:
        [w,b] = model.parameters()
        print(f'epoch {epoch + 1}: w = {w[0][0].item():.3f}, loss={l:.8f}')

print(f'Prediction after training : f(5) = {model(X_test).item():.3f}')

4 1
Prediction before training : f(5) = -3.092
epoch 1: w = 3.646, loss=56.04183197
epoch 6: w = 1.780, loss=1.02249122
epoch 11: w = 2.032, loss=0.01866103
epoch 16: w = 1.998, loss=0.00034471
epoch 21: w = 2.002, loss=0.00000940
epoch 26: w = 2.001, loss=0.00000247
epoch 31: w = 2.001, loss=0.00000174
epoch 36: w = 2.001, loss=0.00000128
epoch 41: w = 2.001, loss=0.00000095
epoch 46: w = 2.001, loss=0.00000070
Prediction after training : f(5) = 10.001
