# AutoGrad

The Autograd package provides automattic differentiation for all operations on Tensors.

`torch.autograd` is an engine for computing the vecotr-Jacobian product. It computes partial dertivatives while applying the chain rule.

In [1]:
import torch

In [16]:
# requires grad = True -> tracks all the operatiosn on the tensor
x = torch.rand(3, requires_grad=True)
# y will hacee grad_func attribute
y = x + 2
y

tensor([2.9317, 2.2573, 2.5848], grad_fn=<AddBackward0>)

In [17]:
# Do more operations on y
z = y * y * 3
print(z)
z = z.mean()
print(z)

tensor([25.7853, 15.2869, 20.0430], grad_fn=<MulBackward0>)
tensor(20.3717, grad_fn=<MeanBackward0>)


In [18]:
# When we finish our computation we can call .backward() and have all the gradients computed automatically

print(x.grad)
z.backward()
print(x.grad) # dz/dx

# Careful, backeard() accumulates the gradients for ths tensor, so in each epoch we nedd to clear our gradient with optimizer.zero_grad()

None
tensor([5.8635, 4.5147, 5.1695])


## Stop a tensor from tracking history

Sometimes we don't want to compute the gradient, so we can do:
- `x.requires_grad_(False)`
- `x.detach()`
- `with torch.no_grad()`

In [22]:
a = torch.ones(2, 3, requires_grad=True)
print(a.requires_grad)

a.requires_grad_(False)
b = a.detach()
print(a.requires_grad)
print(b.requires_grad)

True
False
False


In [21]:
a = torch.ones(2, 3, requires_grad=True)
b = a.detach()
print(a.requires_grad)
print(b.requires_grad)

True
False


In [20]:
a = torch.rand(2, 3, requires_grad=True)
print(a.requires_grad)
with torch.no_grad():
    b = a ** 2
    print(a.requires_grad)
    print(b.requires_grad)

True
True
False


## Gradient Descent AutoGrad

Linear Regression Example:

$f(x) = \mathbb{w}\cdot\mathbb{x} + b$

In [60]:
def fit(x, y, epochs, lr):
    w = torch.rand(1, requires_grad=True)
    b = torch.rand(1, requires_grad=True)
    
    for epoch in range(epochs):
        # Make the prediction
        y_pred = w*x+b
        # Compute the loss with MSE
        loss = torch.mean((y_pred - y)**2)
        # Get the gradient
        loss.backward()
        # Update the weights
        with torch.no_grad():
            w -= lr * w.grad # dl/dw
            b -= lr * b.grad # dl/db
        # Clean the grads    
        w.grad.zero_()
        b.grad.zero_()
        if epoch%100 == 0: 
            print(f'Epoch: {epoch}, loss:{loss.item():.3f}, weights: {w.item():.3f}, bias:{b.item():.3f}')
    
    return w, b

In [71]:
x = torch.arange(1, 7) # x = [1,2,3, ..., 7]
y = torch.arange(2, 14, 2) # y = [2,4,6, ..., 14]
w, b = fit(x, y, 1000, 0.03)

Epoch: 0, loss:15.169, weights: 1.772, bias:0.801
Epoch: 100, loss:0.014, weights: 1.937, bias:0.269
Epoch: 200, loss:0.002, weights: 1.979, bias:0.090
Epoch: 300, loss:0.000, weights: 1.993, bias:0.030
Epoch: 400, loss:0.000, weights: 1.998, bias:0.010
Epoch: 500, loss:0.000, weights: 1.999, bias:0.003
Epoch: 600, loss:0.000, weights: 2.000, bias:0.001
Epoch: 700, loss:0.000, weights: 2.000, bias:0.000
Epoch: 800, loss:0.000, weights: 2.000, bias:0.000
Epoch: 900, loss:0.000, weights: 2.000, bias:0.000


In [72]:
print(f'{w.item():.2f}, {b.item():.2f}, {(4*w+b).item():.2f}')

2.00, 0.00, 8.00
