In [1]:
import torch

### Autograd

The aurograde package provides automatic differentitation on all aperations on tensores. Generally speaking, torch.autograd is an engine for computing the vector Jacobian product. It computespartial derivatives while applying the chain rule.

In [9]:
x = torch.rand(3, requires_grad=True)

In [10]:
x

tensor([0.6268, 0.5272, 0.8882], requires_grad=True)

In [11]:
y = x+2
print(y.grad_fn)

<AddBackward0 object at 0x74e1955e9a80>


In [12]:
z = y*y*3
print(z)

z = z.mean()
print(z)

tensor([20.7004, 19.1608, 25.0259], grad_fn=<MulBackward0>)
tensor(21.6290, grad_fn=<MeanBackward0>)


In [13]:
z.backward()

print(x.grad) #dz/dx

tensor([5.2536, 5.0545, 5.7765])


In [14]:
#Careful : backward() accumulates the gradient for this tensor into .grad attribute.
#call optimizer.zero_grad()

#### Stop a tensor from trcking history

For example during the training loop when we want to update our weights, or after training during evaluation. These operations should not be part of the gradient computation. To prevent this, we can use : 


In [15]:
a = torch.randn(2,3)
a

tensor([[ 0.1002, -0.1635,  0.4429],
        [ 1.2792, -1.0479, -0.5346]])

In [16]:
b = (a*a).sum()
print(a.requires_grad)
print(b.grad_fn)

False
None


In [17]:
a.requires_grad_(True)
b = (a*a).sum()
print(a.requires_grad)
print(b.grad_fn)

True
<SumBackward0 object at 0x74e1955e9870>


In [18]:
a = torch.randn(2,3,requires_grad=True)
b = a.detach()

### Gradient Descent Approach
Linear regression example

f(x) = w*x + b

In [20]:
X = torch.tensor([1,2,3,4,5,6,7,8],dtype=torch.float32)
Y = torch.tensor([2,4,6,8,10,12,14,16])

w = torch.tensor(0.0,requires_grad=True)

In [21]:
#model output
def forward(x):
    return w*x

#loss = MSE
def loss(y,y_pred):
    return ((y_pred-y)**2).mean()

In [25]:
X_test = 5.0
print(f'Prediction before training : f({X_test}) = {forward(X_test).item():.3f}')

Prediction before training : f(5.0) = 0.000


In [27]:
#Training
lr = 0.01
n_epochs = 100

for epoch in range(n_epochs):
    #predict = forward pass
    y_pred=forward(X)

    #loss
    l = loss(Y,y_pred)

    #calculate gradients = backward pass
    l.backward()

    #update weights
    #w.data = w.data - lr*w.grad
    with torch.no_grad():
        w -= lr*w.grad

    #zero teh gradients after updating
    w.grad.zero_()

    if (epoch+1) % 10 == 0:
        print(f'epoch {epoch+1} : w = {w.item():.3f},loss = {l.item():.3f}')

print(f'Prediction after training :f({X_test}) = {forward(X_test).item():.3f}')

epoch 10 : w = 2.000,loss = 0.000
epoch 20 : w = 2.000,loss = 0.000
epoch 30 : w = 2.000,loss = 0.000
epoch 40 : w = 2.000,loss = 0.000
epoch 50 : w = 2.000,loss = 0.000
epoch 60 : w = 2.000,loss = 0.000
epoch 70 : w = 2.000,loss = 0.000
epoch 80 : w = 2.000,loss = 0.000
epoch 90 : w = 2.000,loss = 0.000
epoch 100 : w = 2.000,loss = 0.000
Prediction after training :f(5.0) = 10.000
