# Gradient descent in pytorch

## 1st the manual implementation

In [2]:
import numpy as np

In [11]:
# f = w * x Lninear equation to be used here
# f = 2 * x Our case w = 2
X = np.array([1,2,3,4], dtype = np.float32)
y = np.array([2,4,6,8], dtype = np.float32)

w = 0.0

# model prediction
def forward(x):
    return w * x

#loss MSE
def loss(y, y_pred):
    return ((y_pred - y) ** 2).mean()

#gradient
# MSE = 1/N * (w*x - y) ** 2
# dJ/dw = 1/N * 2x * (w*x - y)

def gradient(x, y, y_pred):
    return np.dot(2*x, y_pred - y).mean()

test = forward(5)
print(f'Prediction before training: f(5) = {test:.3f}')

# Training
learning_rate = 0.01
n_iters = 10

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)

    # loss
    l = loss(y, y_pred)

    # gradients
    dw = gradient(X, y, y_pred)

    # update weights
    w -= learning_rate * dw

    if epoch % 2 == 0:
        print(f'epoch {epoch}; w = {w:.3f}; loss = {l: .8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')


Prediction before training: f(5) = 0.000
epoch 0; w = 1.200; loss =  30.00000000
epoch 2; w = 1.872; loss =  0.76800019
epoch 4; w = 1.980; loss =  0.01966083
epoch 6; w = 1.997; loss =  0.00050332
epoch 8; w = 1.999; loss =  0.00001288
Prediction after training: f(5) = 9.999


## 2nd Using torch tensors

In [None]:
import torch

In [15]:
# f = w * x Lninear equation to be used here
# f = 2 * x Our case w = 2
X = torch.tensor([1,2,3,4], dtype = torch.float32)
y = torch.tensor([2,4,6,8], dtype = torch.float32)

w = torch.tensor(0.0, dtype = torch.float32, requires_grad = True)

# model prediction
def forward(x):
    return w * x

#loss MSE
def loss(y, y_pred):
    return ((y_pred - y) ** 2).mean()

# gradient
# MSE = 1/N * (w*x - y) ** 2
# dJ/dw = 1/N * 2x * (w*x - y)

def gradient(x, y, y_pred):
    return np.dot(2*x, y_pred - y).mean()

test = forward(5)
print(f'Prediction before training: f(5) = {test:.3f}')

# Training
learning_rate = 0.01
n_iters = 100

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)

    # loss
    l = loss(y, y_pred)

    # gradients = backward pass
    l.backward() # dl/ dw

    # update weights
    with torch.no_grad():
        w -= learning_rate * w.grad

    # empty gradient
    w.grad.zero_()

    if epoch % 10 == 0:
        print(f'epoch {epoch}; w = {w:.3f}; loss = {l: .8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')


Prediction before training: f(5) = 0.000
epoch 0; w = 0.300; loss =  30.00000000
epoch 10; w = 1.665; loss =  1.16278565
epoch 20; w = 1.934; loss =  0.04506890
epoch 30; w = 1.987; loss =  0.00174685
epoch 40; w = 1.997; loss =  0.00006770
epoch 50; w = 1.999; loss =  0.00000262
epoch 60; w = 2.000; loss =  0.00000010
epoch 70; w = 2.000; loss =  0.00000000
epoch 80; w = 2.000; loss =  0.00000000
epoch 90; w = 2.000; loss =  0.00000000
Prediction after training: f(5) = 10.000


## 3rd Training pipeline  of torch

1) Design model (input, output size, forward pass)
2) Construct loss and optimizer
3) Training loop
    - forward pass: compute prediction
    - backward pass: gradients
    - update weights
    - repeat these steps for sometime

In [21]:
import torch.nn as nn

In [31]:
# f = w * x Lninear equation to be used here
# f = 2 * x Our case w = 2
X = torch.tensor([1,2,3,4], dtype = torch.float32)
y = torch.tensor([2,4,6,8], dtype = torch.float32)

w = torch.tensor(0.0, dtype = torch.float32, requires_grad = True)

# model prediction
def forward(x):
    return w * x

#loss MSE
def loss(y, y_pred):
    return ((y_pred - y) ** 2).mean()

# gradient
# MSE = 1/N * (w*x - y) ** 2
# dJ/dw = 1/N * 2x * (w*x - y)

def gradient(x, y, y_pred):
    return np.dot(2*x, y_pred - y).mean()

print(f'Prediction before training: f(5) = {forward(5):.3f}')

# Training
learning_rate = 0.001
n_iters = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD([w], lr = learning_rate)

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)

    # loss
    l = loss(y, y_pred)

    # gradients = backward pass
    l.backward() # dl/ dw

    # update weights
    optimizer.step()

    # empty gradient
    optimizer.zero_grad()

    if epoch % 10 == 0:
        print(f'epoch {epoch+1}; w = {w:.3f}; loss = {l: .8f}')

print(f'Prediction after training: f(5) = {forward(5):.3f}')


Prediction before training: f(5) = 0.000
epoch 1; w = 0.030; loss =  30.00000000
epoch 11; w = 0.306; loss =  22.17409134
epoch 21; w = 0.544; loss =  16.38967896
epoch 31; w = 0.748; loss =  12.11420631
epoch 41; w = 0.924; loss =  8.95405006
epoch 51; w = 1.075; loss =  6.61826515
epoch 61; w = 1.205; loss =  4.89180279
epoch 71; w = 1.316; loss =  3.61570978
epoch 81; w = 1.412; loss =  2.67250252
epoch 91; w = 1.494; loss =  1.97534466
Prediction after training: f(5) = 7.794


## 4th Sabhi chize pytorch se

In [58]:
# f = w * x Lninear equation to be used here
# f = 2 * x Our case w = 2
X = torch.tensor([[1],[2],[3],[4]], dtype = torch.float32)
y = torch.tensor([[2],[4],[6],[8]], dtype = torch.float32)

X_test = torch.tensor([5], dtype=torch.float32)

n_samples, n_features = X.shape

input_size = n_features
output_size = n_features

# model = nn.Linear(input_size, output_size) # this is only a single layer

class LinearRegression(nn.Module):
    
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()

        # define layers
        self.lin = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.lin(x)

model = LinearRegression(input_size, output_size)
#loss MSE
def loss(y, y_pred):
    return ((y_pred - y) ** 2).mean()

# gradient
# MSE = 1/N * (w*x - y) ** 2
# dJ/dw = 1/N * 2x * (w*x - y)

def gradient(x, y, y_pred):
    return np.dot(2*x, y_pred - y).mean()

print(f'Prediction before training: f(5) = {model(X_test).item():.3f}')

# Training
learning_rate = 0.01
n_iters = 10000

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = model(X)

    # loss
    l = loss(y, y_pred)

    # gradients = backward pass
    l.backward() # dl/ dw

    # update weights
    optimizer.step()

    # empty gradient
    optimizer.zero_grad()

    if epoch % 100 == 0:
        [w,b] = model.parameters()
        print(f'epoch {epoch+1}; w = {w[0][0].item():.3f}; loss = {l: .8f}')

print(f'Prediction after training: f(5) = {model(X_test).item():.3f}')


Prediction before training: f(5) = 4.446
epoch 1; w = 0.854; loss =  7.46600866
epoch 101; w = 1.694; loss =  0.13590723
epoch 201; w = 1.773; loss =  0.07461142
epoch 301; w = 1.832; loss =  0.04096073
epoch 401; w = 1.876; loss =  0.02248699
epoch 501; w = 1.908; loss =  0.01234508
epoch 601; w = 1.932; loss =  0.00677730
epoch 701; w = 1.949; loss =  0.00372064
epoch 801; w = 1.962; loss =  0.00204259
epoch 901; w = 1.972; loss =  0.00112136
epoch 1001; w = 1.979; loss =  0.00061562
epoch 1101; w = 1.985; loss =  0.00033797
epoch 1201; w = 1.989; loss =  0.00018554
epoch 1301; w = 1.992; loss =  0.00010186
epoch 1401; w = 1.994; loss =  0.00005592
epoch 1501; w = 1.995; loss =  0.00003070
epoch 1601; w = 1.997; loss =  0.00001685
epoch 1701; w = 1.997; loss =  0.00000925
epoch 1801; w = 1.998; loss =  0.00000508
epoch 1901; w = 1.999; loss =  0.00000279
epoch 2001; w = 1.999; loss =  0.00000153
epoch 2101; w = 1.999; loss =  0.00000084
epoch 2201; w = 1.999; loss =  0.00000046
epoch