### Autograd

Used in the backpropogation step in the NN 

**Date:** 28/10/2021  
**Author:** Murad Popattia

In [22]:
import torch

In [23]:
a = torch.tensor([5.], requires_grad=True)
b = torch.tensor([5.], requires_grad=True)

In [24]:
a,b

(tensor([5.], requires_grad=True), tensor([5.], requires_grad=True))

In [25]:
y = a ** 3 - b ** 2

In [26]:
# For computing gradients we would do
# dy / da and dy / db
a.grad, b.grad

(None, None)

In [27]:
# Now we would compute gradient over y
y.backward()

In [28]:
# now we have values for individual values done y AUTOGRAD
a.grad, b.grad

(tensor([75.]), tensor([-10.]))

### Simulating a neural network

In [75]:
# n_features x n_units -> n_units:1 in this case as we have a single output
W = torch.rand(10,1, requires_grad = True) 

# n_samples x n_features
x = torch.rand(1, 10, requires_grad = True) 

# n_features x 1
b = torch.rand(1, 1, requires_grad = True) 

In [76]:
# Wx + b
output = x.mm(W) + b
print(output.shape)

torch.Size([1, 1])


In [77]:
# forward prop
loss = 1 - output

In [78]:
loss, output

(tensor([[-2.4505]], grad_fn=<RsubBackward1>),
 tensor([[3.4505]], grad_fn=<AddBackward0>))

We see there are associated grad functions with them

In [79]:
print(W.grad)

None


In [80]:
# Now we backprop
loss.backward()

In [81]:
# checking gradients
W.grad

tensor([[-0.7254],
        [-0.6594],
        [-0.4947],
        [-0.2016],
        [-0.7754],
        [-0.0114],
        [-0.8516],
        [-0.6948],
        [-0.8337],
        [-0.1812]])

In [82]:
# Now update the W
lr = 0.001

# we don't calculate grad here, just updation
# this helps avoid unecessary computation
with torch.no_grad():
    W = W - lr * W.grad.data
    
W

tensor([[0.8652],
        [0.5214],
        [0.2669],
        [0.7209],
        [0.4014],
        [0.3441],
        [0.1958],
        [0.6894],
        [0.6468],
        [0.5280]])