#  Gradient Descent

Using Numpy: Doing all manually.

In [1]:
# Let's first manually calculate the gradient for linear function.

In [2]:
import numpy as np

In [13]:
# Initialization: liear regression  [ y = 2*x ]
x = np.array([1,2,3,4],dtype = np.float32)
y = np.array([2,4,6,8],dtype = np.float32)
# weight
w = 0.0

In [14]:
# Model prediction
def forward(x):
    return w*x

#Loss  = MSE (for linear regression)
def loss(y,y_pred):
    return ((y_pred-y)**2).mean()

# gradient of the loss function
# MSE = 1/N(w*x - y)**2
# d(loss)/dw = 1/N.(2x).(w*x-y)
def gradient(x,y,y_pred):
    return np.dot(2*x, (y_pred-y)).mean()

In [15]:
# f(5) = w*x = 0*5 ---> 0
print(f'Prediction before training: f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iters = 10

# Prediction
for epoch in range(n_iters):
    # forward pass
    y_pred = forward(x)
    # loss
    l = loss(y,y_pred)
    # gradients
    dw = gradient(x,y,y_pred)
    # update the gradient.
    w -= learning_rate * dw
    
    if epoch%1==0:
        print(f'epoch: {epoch+1}: weight = {w:.3f},loss={l:.8f}')
        

# Final prediction
print(f'Prediction after training:f(5) = {forward(5):0.5f}')
        


Prediction before training: f(5) = 0.000
epoch: 1: weight = 1.200,loss=30.00000000
epoch: 2: weight = 1.680,loss=4.79999924
epoch: 3: weight = 1.872,loss=0.76800019
epoch: 4: weight = 1.949,loss=0.12288000
epoch: 5: weight = 1.980,loss=0.01966083
epoch: 6: weight = 1.992,loss=0.00314574
epoch: 7: weight = 1.997,loss=0.00050331
epoch: 8: weight = 1.999,loss=0.00008053
epoch: 9: weight = 1.999,loss=0.00001288
epoch: 10: weight = 2.000,loss=0.00000206
Prediction after training:f(5) = 9.99895


We see that the weights are getting updataed and loss is decreasing.
Final output ideally should be 10, but it is almost around that
And for higher iterations it will become more accurate.

# Using Torch

In [16]:
import torch

In [42]:
# Initialization: liear regression  [ y = 2*x ]
x = torch.tensor([1,2,3,4],dtype = torch.float32)
y = torch.tensor([2,4,6,8],dtype = torch.float32)
# weight
w = torch.tensor(0.0, requires_grad = True)

In [43]:
# Model prediction
def forward(x):
    return w*x

#Loss  = MSE (for linear regression)
def loss(y,y_pred):
    return ((y_pred-y)**2).mean()

# gradient of the loss function
# MSE = 1/N(w*x - y)**2
# d(loss)/dw = 1/N.(2x).(w*x-y)
def gradient(x,y,y_pred):
    return torch.dot(2*x, (y_pred-y)).mean()

In [44]:
# f(5) = w*x = 0*5 ---> 0
print(f'Prediction before training: f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.01
n_iters = 20

# Prediction
for epoch in range(n_iters):
    # forward pass
    y_pred = forward(x)
    
    # loss
    l = loss(y,y_pred)
    
    # backward prop: dl/dw
    l.backward()
    # update  the gradient.
    with torch.no_grad(): # w.grad should be free from prev iter.
        w -= learning_rate * w.grad
    
    # zero gradient: w.grad = 0
    w.grad.zero_() 
    if epoch%2==0:
        print(f'epoch: {epoch+1}: weight = {w:.3f},loss={l:.8f}')
        

# Final prediction
print(f'Prediction after training:f(5) = {forward(5):0.5f}')
        


Prediction before training: f(5) = 0.000
epoch: 1: weight = 0.300,loss=30.00000000
epoch: 3: weight = 0.772,loss=15.66018772
epoch: 5: weight = 1.113,loss=8.17471695
epoch: 7: weight = 1.359,loss=4.26725292
epoch: 9: weight = 1.537,loss=2.22753215
epoch: 11: weight = 1.665,loss=1.16278565
epoch: 13: weight = 1.758,loss=0.60698116
epoch: 15: weight = 1.825,loss=0.31684780
epoch: 17: weight = 1.874,loss=0.16539653
epoch: 19: weight = 1.909,loss=0.08633806
Prediction after training:f(5) = 9.61241


# Training Pipeline

Steps we need to follow:
1. Design model(input,output_size, forward pass)
2. Construct loss and optimizer
3. Training Loop
   - forward Pass: compute prediction
   - backward pass: gradient
   - update weights

In [45]:
import torch.nn as nn

In [47]:
# Initialization: liear regression  [ y = 2*x ]
x = torch.tensor([1,2,3,4],dtype = torch.float32)
y = torch.tensor([2,4,6,8],dtype = torch.float32)
# weight
w = torch.tensor(0.0, requires_grad = True)

In [48]:
# Model prediction
learning_rate = 0.01
n_iters = 20

def forward(x):
    return w*x

#Loss callback  = MSE (for linear regression)
loss = nn.MSELoss()

# calling optimizer: SGD
optimizer = torch.optim.SGD([w],lr = learning_rate)

In [49]:
# f(5) = w*x = 0*5 ---> 0
print(f'Prediction before training: f(5) = {forward(5):.3f}')

# Training

# Prediction
for epoch in range(n_iters):
    # forward pass
    y_pred = forward(x)
    
    # loss
    l = loss(y,y_pred)
    
    # backward prop: dl/dw
    l.backward()
    # update weights:
    optimizer.step()
    
    
    # zero gradient: w.grad = 0
    optimizer.zero_grad() 
    if epoch%2==0:
        print(f'epoch: {epoch+1}: weight = {w:.3f},loss={l:.8f}')
        

# Final prediction
print(f'Prediction after training:f(5) = {forward(5):0.5f}')
        


Prediction before training: f(5) = 0.000
epoch: 1: weight = 0.300,loss=30.00000000
epoch: 3: weight = 0.772,loss=15.66018772
epoch: 5: weight = 1.113,loss=8.17471695
epoch: 7: weight = 1.359,loss=4.26725292
epoch: 9: weight = 1.537,loss=2.22753215
epoch: 11: weight = 1.665,loss=1.16278565
epoch: 13: weight = 1.758,loss=0.60698116
epoch: 15: weight = 1.825,loss=0.31684780
epoch: 17: weight = 1.874,loss=0.16539653
epoch: 19: weight = 1.909,loss=0.08633806
Prediction after training:f(5) = 9.61241


In [50]:
# Now let's replace manually implemented forward method with PyTorch method

In [88]:
# Initialization: liear regression : 2-d tensor now
x = torch.tensor([[1],[2],[3],[4]],dtype = torch.float32)
y = torch.tensor([[2],[4],[6],[8]],dtype = torch.float32)

# size of input, features
n_samples, n_features = x.shape # 4*1
print(n_samples,n_features)
input_size = n_features
output_size = n_features

4 1


In [None]:
# weight: we don't really need to define weights.
# Model is Linear regression model
model = nn.Linear(input_size,output_size)

In [90]:
x_test = torch.tensor([5.],dtype = torch.float32)
print(f'Prediction before training: f(5) = {model(x_test).item():.3f}')

Prediction before training: f(5) = 0.989


In [91]:
# Model prediction
learning_rate = 0.01
n_iters = 100


#Loss callback  = MSE (for linear regression)
loss = nn.MSELoss()
# calling optimizer: SGD
optimizer = torch.optim.SGD(model.parameters(),lr = learning_rate)

In [92]:
# Prediction
for epoch in range(n_iters):
    # forward pass
    y_pred = model(x)
    
    # loss
    l = loss(y,y_pred)
    
    # backward prop: dl/dw
    l.backward()
    # update weights:
    optimizer.step()
    
    
    # zero gradient: w.grad = 0
    optimizer.zero_grad() 
    if epoch%10==0:
        # unpack weight and bias
        [w,b] = model.parameters()
        print(f'epoch: {epoch+1}: w = {w[0][0]} ,l={l:.8f}')
        

# Final prediction
print(f'Prediction after training:f(5) =  {model(x_test).item():.3f}')
        


epoch: 1: w = 0.30486929416656494 ,l=21.84905815
epoch: 11: w = 1.3814506530761719 ,l=0.80809522
epoch: 21: w = 1.5648349523544312 ,l=0.24958242
epoch: 31: w = 1.6042473316192627 ,l=0.22182210
epoch: 41: w = 1.6202094554901123 ,l=0.20856853
epoch: 51: w = 1.632115125656128 ,l=0.19641997
epoch: 61: w = 1.6430925130844116 ,l=0.18498704
epoch: 71: w = 1.6536530256271362 ,l=0.17421982
epoch: 81: w = 1.663886547088623 ,l=0.16407934
epoch: 91: w = 1.6738152503967285 ,l=0.15452912
Prediction after training:f(5) =  9.346


In [87]:
# Doing same using our custom Layer.

In [89]:
class LinearRegression(nn.Module):
    
    def __init__(self,input_dim,output_dim):
        super(LinearRegression,self).__init__()
        #define layers
        self.lin = nn.Linear(input_dim, output_dim)
        
    def forward(self,x):
        return self.lin(x)
model  = LinearRegression(input_size,output_size)

In [93]:
# WE ran the above custom function while commenting previously defined model.
# we got almost the same output.