# Gradient Descent

1.  Prediction: Manually\
    Gradients Computation: Manually\
    Loss Computation: Manually\
    Parameter Updates: Manually

In [1]:
import numpy as np

In [2]:
X = np.array([1,2,3,4], dtype=np.float32)
Y = np.array([2,4,6,8], dtype=np.float32)

w = 0.0

Model Prediction

In [3]:
def forward_pass(x):
    return w*x

In [4]:
# Loss: MSE
def loss(y, y_pred):
    return(((y_pred-y)**2).mean())

In [5]:
# Gradient
# MSE = 1/n * (w*x - y)^2
# dJ/dw = 1/n * 2*x*(w*x - y)

def gradient(x,y,y_pred):
    return np.dot(2*x,y_pred-y).mean()

In [6]:
print(f"Prediction before training: f(5) = {forward_pass(5):.3f}")

Prediction before training: f(5) = 0.000


In [7]:
# Training
learning_rate = 0.01
n_iters = 20

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward_pass(X)

    #loss
    l = loss(Y, y_pred)

    #gradients
    dw = gradient(X,Y,y_pred)

    # update weights
    w -= learning_rate * dw

    if epoch % 2==0:
        print(f"epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}")

print(f"Prediction after training: f(5) = {forward_pass(5):.3f}")

epoch 1: w = 1.200, loss = 30.00000000
epoch 3: w = 1.872, loss = 0.76800019
epoch 5: w = 1.980, loss = 0.01966083
epoch 7: w = 1.997, loss = 0.00050332
epoch 9: w = 1.999, loss = 0.00001288
epoch 11: w = 2.000, loss = 0.00000033
epoch 13: w = 2.000, loss = 0.00000001
epoch 15: w = 2.000, loss = 0.00000000
epoch 17: w = 2.000, loss = 0.00000000
epoch 19: w = 2.000, loss = 0.00000000
Prediction after training: f(5) = 10.000


2.  Prediction: Manually\
    Gradients Computation: Autograd\
    Loss Computation: Manually\
    Parameter Updates: Manually

In [8]:
import torch
X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

In [9]:
# Forward Prop and Loss Function are the same as before
# Training
learning_rate = 0.01
n_iters = 100

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward_pass(X)

    #loss
    l = loss(Y, y_pred)

    #gradients = backward pass
    l.backward() # dl/dw

    # update weights
    with torch.no_grad():
        w -= learning_rate * w.grad
    
    # zero gradients
    w.grad.zero_()    # to prevent accumulation of prev values in grad in w

    if epoch % 10==0:
        print(f"epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}")

print(f"Prediction after training: f(5) = {forward_pass(5):.3f}")

epoch 1: w = 0.300, loss = 30.00000000
epoch 11: w = 1.665, loss = 1.16278565
epoch 21: w = 1.934, loss = 0.04506890
epoch 31: w = 1.987, loss = 0.00174685
epoch 41: w = 1.997, loss = 0.00006770
epoch 51: w = 1.999, loss = 0.00000262
epoch 61: w = 2.000, loss = 0.00000010
epoch 71: w = 2.000, loss = 0.00000000
epoch 81: w = 2.000, loss = 0.00000000
epoch 91: w = 2.000, loss = 0.00000000
Prediction after training: f(5) = 10.000


### Training Pipeline
    i) Design model (input, output size, forward pass with layers)
    ii) Construct loss and optimizer
    iii) Training loop
        - Forward pass: compute prediction
        - Backward Pass: Gradients
        - Update Weights

3.  Prediction: Manually\
    Gradients Computation: Autograd\
    Loss Computation: PyTorch Loss\
    Parameter Updates: PyTorch Optimizer

In [10]:
import torch.nn as nn

learning_rate = 0.01
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD([w], lr = learning_rate)

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward_pass(X)

    #loss
    l = loss(Y, y_pred)

    #gradients = backward pass
    l.backward() # dl/dw

    # update weights
    optimizer.step() # Does an optimization step
    
    # zero gradients
    optimizer.zero_grad()    # to prevent accumulation of prev values in grad in w

    if epoch % 10==0:
        print(f"epoch {epoch+1}: w = {w:.3f}, loss = {l:.8f}")

print(f"Prediction after training: f(5) = {forward_pass(5):.3f}")

epoch 1: w = 2.000, loss = 0.00000000
epoch 11: w = 2.000, loss = 0.00000000
epoch 21: w = 2.000, loss = 0.00000000
epoch 31: w = 2.000, loss = 0.00000000
epoch 41: w = 2.000, loss = 0.00000000
epoch 51: w = 2.000, loss = 0.00000000
epoch 61: w = 2.000, loss = 0.00000000
epoch 71: w = 2.000, loss = 0.00000000
epoch 81: w = 2.000, loss = 0.00000000
epoch 91: w = 2.000, loss = 0.00000000
Prediction after training: f(5) = 10.000


4.  Prediction: PyTorch Model\
    Gradients Computation: Autograd\
    Loss Computation: PyTorch Loss\
    Parameter Updates: PyTorch Optimizer

In [11]:
# No need of weights(w) and forward_pass()

X = torch.tensor([[1],[2],[3],[4]], dtype=torch.float32) # 2D array where each row is a sample(data point) 
Y = torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)

X_test = torch.tensor([5], dtype=torch.float32)

n_samples, n_features = X.shape
print(n_samples, n_features)

4 1


In [12]:
input_size= n_features
output_size = n_features

model = nn.Linear(input_size, output_size)

print(f"Prediction before training: f(5) = {model(X_test).item():.3f}")

Prediction before training: f(5) = 0.236


In [17]:
learning_rate = 0.01
n_iters = 200

optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = model(X)

    #loss
    l = loss(Y, y_pred)

    #gradients = backward pass
    l.backward() # dl/dw

    # update weights
    optimizer.step() # Does an optimization step
    
    # zero gradients
    optimizer.zero_grad()    # to prevent accumulation of prev values in grad in w

    if epoch % 10==0:
        [w, b] = model.parameters()
        print(f"epoch {epoch+1}: w = {w[0][0].item():.3f}, loss = {l:.8f}")

print(f"Prediction after training: f(5) = {model(X_test).item():.3f}")

epoch 1: w = 1.954, loss = 0.00309146
epoch 11: w = 1.955, loss = 0.00291152
epoch 21: w = 1.957, loss = 0.00274206
epoch 31: w = 1.958, loss = 0.00258245
epoch 41: w = 1.959, loss = 0.00243214
epoch 51: w = 1.960, loss = 0.00229058
epoch 61: w = 1.961, loss = 0.00215726
epoch 71: w = 1.963, loss = 0.00203169
epoch 81: w = 1.964, loss = 0.00191343
epoch 91: w = 1.965, loss = 0.00180206
epoch 101: w = 1.966, loss = 0.00169717
epoch 111: w = 1.967, loss = 0.00159840
epoch 121: w = 1.968, loss = 0.00150536
epoch 131: w = 1.969, loss = 0.00141774
epoch 141: w = 1.970, loss = 0.00133522
epoch 151: w = 1.971, loss = 0.00125751
epoch 161: w = 1.971, loss = 0.00118432
epoch 171: w = 1.972, loss = 0.00111538
epoch 181: w = 1.973, loss = 0.00105046
epoch 191: w = 1.974, loss = 0.00098932
Prediction after training: f(5) = 9.948


Simple Custom Model (For Linear Regression)

In [18]:
class LinearRegression(nn.Module):

    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        #define layers
        self.lin = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.lin(x)

model = LinearRegression(input_size, output_size)