# Gradient Descent using Numpy

In [2]:
# Let's start by doing it manually, just using numpy
import numpy as np

X = np.array([1,2,3,4], dtype=np.float32)
Y = np.array([2,4,6,8], dtype=np.float32)

w = 0.0

# now we need to caculate the model predcition and loss

# model prediction
def forward(x):
  return w * x

# loss = MSE
def loss(y, y_pred):
  return ((y_pred - y)**2).mean()

# gradient
# MSE = 1/N * (w*x - y)**2
# so dJ/dw = 1/N 2x (w*x - y)   -  y_pred = w*x
def gradient(x,y,y_pred):
  return np.dot(2*x, y_pred - y).mean() # .mean() needed for the 1/N


print(f'Prediction before training is f(5) = {forward(5):.3f}')

# Now let's start our training
learning_rate = 0.01
n_iters = 20

for epoch in range(n_iters):
  # first we need the prediction (through the forward pass)
  y_pred = forward(X)

  # now we need the loss
  l = loss(Y, y_pred)

  # now we need the gradients w.r.t w
  dw = gradient(X,Y,y_pred)

  # now update the weights
  w -= learning_rate * dw

  if epoch % 2 == 0: # printing a message at every other epoch
    print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training is f(5) = {forward(5):.3f}')

# We see that with each training step, we are increasing our weights and decreasing our loss

Prediction before training is f(5) = 0.000
epoch 1: w = 1.200, loss = 30.00000000
epoch 3: w = 1.872, loss = 0.76800019
epoch 5: w = 1.980, loss = 0.01966083
epoch 7: w = 1.997, loss = 0.00050331
epoch 9: w = 1.999, loss = 0.00001288
epoch 11: w = 2.000, loss = 0.00000033
epoch 13: w = 2.000, loss = 0.00000001
epoch 15: w = 2.000, loss = 0.00000000
epoch 17: w = 2.000, loss = 0.00000000
epoch 19: w = 2.000, loss = 0.00000000
Prediction after training is f(5) = 10.000


# Gradient Descent using Torch

In [4]:
# Let's start by doing it manually, just using numpy
import torch

X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# now we need to caculate the model predcition and loss

# model prediction
def forward(x):
  return w * x

# loss = MSE
def loss(y, y_pred):
  return ((y_pred - y)**2).mean()


print(f'Prediction before training is f(5) = {forward(5):.3f}')

# Now let's start our training
learning_rate = 0.01
n_iters = 100

for epoch in range(n_iters):
  # first we need the prediction (through the forward pass)
  y_pred = forward(X)

  # now we need the loss
  l = loss(Y, y_pred)

  # now we need to do the backward pass
  l.backward() # this will compute the gradient of our loss w.r.t 'w'


  with torch.no_grad():
    w -= learning_rate * w.grad  # we dont want this operation to be part of our gradient tracking graphs/computational graphs
  
  # make sure to zero the gradients
  w.grad.zero_()

  if epoch % 10 == 0: # printing a message at every other epoch
    print(f'epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training is f(5) = {forward(5):.3f}')

# We see that with each training step, we are increasing our weights and decreasing our loss

# with 20 iterations, we get "Prediction after training is f(5) = 9.612"
# but we see that it is not correct 
# this is because the backpropagation is not as exact as the numerical gradient computation

# by increasing our iterations from 20 to 100, we now get a correct prediction

# next we will replace our manual loss and weight updates with PyTorch loss and optimizer classes

Prediction before training is f(5) = 0.000
epoch 1: w = 0.300, loss = 30.00000000
epoch 11: w = 1.665, loss = 1.16278565
epoch 21: w = 1.934, loss = 0.04506890
epoch 31: w = 1.987, loss = 0.00174685
epoch 41: w = 1.997, loss = 0.00006770
epoch 51: w = 1.999, loss = 0.00000262
epoch 61: w = 2.000, loss = 0.00000010
epoch 71: w = 2.000, loss = 0.00000000
epoch 81: w = 2.000, loss = 0.00000000
epoch 91: w = 2.000, loss = 0.00000000
Prediction after training is f(5) = 10.000
