In [32]:
import numpy as np
import torch

# Gradient descent
In this step we implement *gradient descent algorithm*. We will be working with simple linear regression model:  $f = w*x$, where $w_{true} = 2$.

## Using only numpy

In [41]:
# Our data consist of 4 different objects
X_data = np.array([1, 2, 3, 4], dtype=np.float32)
y_data = X_data * 2

In [17]:
# initial parameter
w = np.random.rand(1)
w

array([0.13893278])

define the functions that calculate **loss value** and **gradient**

In [18]:
def loss(y_pred, y_true):
    # MSE
    return ((y_pred - y_true)**2).mean()

def gradient(x, y_pred, y_true):
    #dMSE/dw = 2 * x * (y_pred - y_true) / 2
    return np.dot(x, y_pred-y_true)

define the function that calculate the output of our model

In [19]:
def model_predict(x):
    return w * x

Now everything is ready for train our model and try to find the optimal value for `w`

In [20]:
n_epoches = 10
leaning_rate = 0.01

for epoch in range(n_epoches):
    output = model_predict(X_data)
    # calculate the gradient
    loss_value = loss(output, y_data)
    grad = gradient(X_data, output, y_data)
    
    w = w - leaning_rate * grad
    print(f'epoch № {epoch}: loss = {loss_value}')

print()
print('weights after training:', w)

epoch № 0: loss = 25.976783868344008
epoch № 1: loss = 12.728624095488563
epoch № 2: loss = 6.237025806789397
epoch № 3: loss = 3.056142645326805
epoch № 4: loss = 1.4975098962101339
epoch № 5: loss = 0.7337798491429652
epoch № 6: loss = 0.3595521260800534
epoch № 7: loss = 0.17618054177922593
epoch № 8: loss = 0.08632846547182065
epoch № 9: loss = 0.0423009480811921

weights after training: [1.94742946]


## Using backward and gradient from PyTorch

In [42]:
# our data
X_data = torch.from_numpy(X_data)
y_data = torch.from_numpy(y_data)

# initial weights
weights = torch.rand(1)*5
weights.requires_grad_(True)
weights

tensor([4.6686], requires_grad=True)

In [43]:
y_data

tensor([2., 4., 6., 8.])

In [44]:
def model_prediction(x):
    return weights * x

def loss_f(y_pred, y_true):
    return ((y_pred - y_true)**2).mean()

In [45]:
n_epoches = 100
learning_rate = 0.01

for epoch in range(n_epoches):
    # 1 step: forward pass
    output = model_prediction(X_data)
    
    # 2 step: calculate loss function
    loss_value = loss_f(output, y_data)
    
    # 3 step: calculate gradient
    loss_value.backward()
    
    # 4 step: update our weights
    with torch.no_grad():
        weights -= learning_rate * weights.grad
    
    # 5 step: zero gradient
    weights.grad.zero_()
    
    if (epoch+1) % 10 == 0:
        print(f'epoch № {epoch+1}: loss = {loss_value}\n')
        
print(f'weight = {weights}')

epoch № 10: loss = 2.8652901649475098

epoch № 20: loss = 0.1110573559999466

epoch № 30: loss = 0.004304530099034309

epoch № 40: loss = 0.0001668329641688615

epoch № 50: loss = 6.467465027526487e-06

epoch № 60: loss = 2.514570951461792e-07

epoch № 70: loss = 9.707790127322369e-09

epoch № 80: loss = 3.836930773104541e-10

epoch № 90: loss = 1.5347723092418164e-11

epoch № 100: loss = 3.595346242946107e-12

weight = tensor([2.0000], requires_grad=True)


## Using optimizer, loss, backward and gradient from PyTorch

In [52]:
weights = torch.rand(1)*15
weights.requires_grad_(True)
weights

tensor([8.4871], requires_grad=True)

In [53]:
def model_prediction(x):
    return weights*x

In [54]:
# contstruct loss and optimizer
optimizer = torch.optim.SGD([weights], lr=0.01)
loss = torch.nn.MSELoss()

In [55]:
# training loop
n_epoches = 100

for epoch in range(n_epoches):
    output = model_prediction(X_data)
    loss_value = loss(output, y_data)
    loss_value.backward()
    
    optimizer.step()
    optimizer.zero_grad()
    
    if (epoch+1) % 10 == 0:
        print(f'epoch №{epoch+1}: loss = {loss_value}\n')
print(f'weights =', weights)

epoch №10: loss = 16.93170928955078

epoch №20: loss = 0.656265914440155

epoch №30: loss = 0.025436677038669586

epoch №40: loss = 0.0009859043639153242

epoch №50: loss = 3.820823985734023e-05

epoch №60: loss = 1.481266735936515e-06

epoch №70: loss = 5.710887762688799e-08

epoch №80: loss = 2.143067945326038e-09

epoch №90: loss = 8.355982572538778e-11

epoch №100: loss = 3.595346242946107e-12

weights = tensor([2.0000], requires_grad=True)


## Using only PyTorch packages

In [73]:
# data preparing: each object is independent row
X_data = X_data.reshape(-1, 1)
y_data = y_data.reshape(-1, 1)

y_data

tensor([[2.],
        [4.],
        [6.],
        [8.]])

In [80]:
# define the model
model = torch.nn.Linear(in_features=1, out_features=1, bias=False)
print(next(model.parameters()))

# construct loss and optimizer
loss_function = torch.nn.MSELoss()
optimizer = torch.optim.SGD(params=model.parameters(), lr=0.05)

# training loop
n_epoches = 10

for epoch in range(n_epoches):
    # 1: forward pass
    output = model(X_data)
    # 2: calculate loss function
    loss_value = loss_function(output, y_data)
    
    # 3: calculate the gradient
    loss_value.backward()
    
    # 4: update weights
    optimizer.step()
    
    # 5 zero gradient
    optimizer.zero_grad()
    
    print(f'epoch №{epoch+1}: loss = {loss_value}')
        
print(f'model prediction for x=5: ', model(torch.tensor([5.])).item())

Parameter containing:
tensor([[0.5106]], requires_grad=True)
epoch №1: loss = 16.637157440185547
epoch №2: loss = 1.0398225784301758
epoch №3: loss = 0.06498882919549942
epoch №4: loss = 0.004061833024024963
epoch №5: loss = 0.0002538588596507907
epoch №6: loss = 1.5866437024669722e-05
epoch №7: loss = 9.916036560753128e-07
epoch №8: loss = 6.191835666413681e-08
epoch №9: loss = 3.8395029378079926e-09
epoch №10: loss = 2.455635694786906e-10
model prediction for x=5:  9.999992370605469
