# Pytoch - Basics

In [2]:
import torch
import numpy as np

## Create tensors and convert from numpy

In [11]:
# basic tensor
tsr = torch.tensor(data=[2,2,1], dtype=torch.double, requires_grad=True, device='cpu')
tsr1 = torch.tensor(data=[[2,1],[2,1],[1,1]], dtype=torch.double, requires_grad=True, device='cpu')
print(tsr)
print(tsr1)

tensor([2., 2., 1.], dtype=torch.float64, requires_grad=True)
tensor([[2., 1.],
        [2., 1.],
        [1., 1.]], dtype=torch.float64, requires_grad=True)


In [18]:
# tensor from numpy
array = np.array([[1,2],[2,2]])
tsr3 = torch.from_numpy(array)
tsr3.to('cpu')
print(tsr3)

tensor([[1, 2],
        [2, 2]], dtype=torch.int32)


## Check if GPU is available

In [13]:
if torch.cuda.is_available():
    print('GPU available')
else:
    print('Only CPU available')

Only CPU available


## Backward propagation

The backward propagation is the derivative from the model function.</br>
`a = (b*c) * d`</br>
`da/db = d(b*c)/db * d(b*d)/db = d*c`</br>
And it is used to calculate the gradient function for model optimization

In [28]:
# To be able to access the backpropagation it is required to set requires_grad=True
x = torch.tensor(data=[1],dtype=torch.double,requires_grad=True)
w = x+2
z = x*2

z.backward()

print('x backward: ', x.grad)

x backward:  tensor([2.], dtype=torch.float64)


In [31]:
# Attention to cases where loops are applied over the tensor, the backward value is stacked
x = torch.tensor(data=[1,2],dtype=torch.double,requires_grad=True)

for i in range(3):
    z = (x*2).sum()
    z.backward()
    print(x.grad)

tensor([2., 2.], dtype=torch.float64)
tensor([4., 4.], dtype=torch.float64)
tensor([6., 6.], dtype=torch.float64)


In [33]:
# To correct this cases it is recommended to clear the backward pass after each loop
x = torch.tensor(data=[1,2],dtype=torch.double,requires_grad=True)

for i in range(3):
    z = (x*2).sum()
    z.backward()
    print(x.grad)
    x.grad.zero_()

tensor([2., 2.], dtype=torch.float64)
tensor([2., 2.], dtype=torch.float64)
tensor([2., 2.], dtype=torch.float64)


In [100]:
# The gradient is used to optimize the weights and minimize the loss function
# ex. lets say the function is y = 2 * x and we want to find the weights w=2

x = torch.from_numpy(np.array([1.0, 2.0, 3.0]))
y = torch.from_numpy(np.array([2.0, 4.0, 6.0]))
w = torch.tensor(data=0.0, dtype=float, requires_grad=True)

def MSE():
    return ((y - x*w)**2).mean()

rate = 0.1

for epoch in range(5):
    w_mse = MSE()
    w_mse.backward()
    
    print('iter {} loss: {:.5f}, w: {:.5f}'.format(epoch, w_mse, w))
    
    grad = w.grad
    w.data -= grad*rate
    w.grad.zero_()


print('final loss: {:.5f}, w: {:.5f}'.format(MSE(), w))

iter 0 loss: 18.66667, w: 0.00000
iter 1 loss: 0.08296, w: 1.86667
iter 2 loss: 0.00037, w: 1.99111
iter 3 loss: 0.00000, w: 1.99941
iter 4 loss: 0.00000, w: 1.99996
final loss: 0.00000, w: 2.00000


## Optimization

It is important to learn the base concepts. In practice it is possible to use the built-in functions to optimize and menage the backward and forward passes

In [121]:
# ex. lets say the function is y = 2 * x and we want to find the weights w=2

x = torch.from_numpy(np.array([1.0, 2.0, 3.0]))
y = torch.from_numpy(np.array([2.0, 4.0, 6.0]))
w = torch.tensor(data=0.0, dtype=float, requires_grad=True)

rate = 0.1
optimizer = torch.optim.SGD([w], lr=rate)
loss = torch.nn.MSELoss()

for epoch in range(5):
 
    w_mse = loss(x*w, y)
    w_mse.backward()

    optimizer.step()
    optimizer.zero_grad()
    
    print('iter {} loss: {:.5f}, w: {:.5f}'.format(epoch, w_mse, w))


print('final loss: {:.5f}, w: {:.5f}'.format(MSE(), w))

iter 0 loss: 18.66667, w: 1.86667
iter 1 loss: 0.08296, w: 1.99111
iter 2 loss: 0.00037, w: 1.99941
iter 3 loss: 0.00000, w: 1.99996
iter 4 loss: 0.00000, w: 2.00000
final loss: 0.00000, w: 2.00000


In [119]:
x*w

tensor([0.3098, 0.6195, 0.9293], dtype=torch.float64, grad_fn=<MulBackward0>)

In [120]:
y

tensor([2., 4., 6.], dtype=torch.float64)