In [None]:
import torch

In [None]:
# The autograd package provides automatic differentiation for all operations on Tensors.
# Your BP is defined by how your code is run, and that every single iteration can be different.


# torch.Tensor is the central class. 
# If you sef its attribute '.requires_grad' as 'True', it starts to track all operations on it.
# When you finish your computation, you can call '.backward()' and have all the gradients computed automatically.
# The gradient for this tensor will be accumulated into '.grad' attribute.

# To stop a tensor from tracking history, you can call '.detach()' to detach if from the computation history, and to prevent future computation from being tracked.

# To prevent tracking history(memory using), you can wrap the code block in 'with torch.no_grad()'.
# This can be helpful when evaluating a model because the model may have trainable parameters with 'requires_grad = True', but for which we don't need the gradients.

# 'Tensor' and 'Function' are interconnected and build up an acyclic(not recycle) graph, that encodes a complete history of computation.
# Each tensor has a '.grad_fn' attribute that references a 'Function' that has created the 'Tensor'.

# If you wan to calculate the derivatives, you can call '.backward()' on a 'Tensor'.
# If 'Tensor' is a scalar, you don't need to specify any arguments to 'backward()'
# However, if it has more elements, you need to specify a 'gradient' argument that is a tensor of matching shape.

x = torch.ones(2, 2, requires_grad=True)

y = x + 2

z = y * y * 3

out = z.mean()

# Becaues out contains a single scalar, out.backward() is equivlant to out.backward(torch.tensor(1.))
out.backward()
# If out is no longer a scalar, torch.autograd couldn't compute the full Jacobian directly, but if we want the
# Vector-Jacobian product, simply pass the vector to backward as argument.

print(x.grad)

# Could stop a tensor from tracking history
x = x.detach()
print(x.requires_grad, x.grad)

# can also stop autograd from tracking history on Tensors by wrapping the code block in 'with torch.no_grad()'
with torch.no_grad():
    print((x ** 2).requires_grad)

In [None]:
a = torch.rand(2,2)
a = ((a * 3) / (a - 1))

print(a.requires_grad)

# .requires_grad_() changes an existing Tensor's requires_grad flag in-place.
a.requires_grad_(True)
print(a.requires_grad)

# equivlant to \sum(a \odot a)
b = (a * a).sum()

b.backward()
print(a.grad)