In [1]:
#pytorch `torch.autograd` lets us automatically compute the gradient so we can use it for back-propagation


In [2]:
import torch

x = torch.ones(5)  # input tensor
y = torch.zeros(3)  # expected output
w = torch.randn(5, 3, requires_grad=True) #this allows ut to compute gradients
b = torch.randn(3, requires_grad=True)
z = torch.matmul(x, w)+b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)

In [3]:
print(f"Gradient function for z = {z.grad_fn}")
print(f"Gradient function for loss = {loss.grad_fn}")

Gradient function for z = <AddBackward0 object at 0x7df4513aed10>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward0 object at 0x7df451408880>


In [4]:
loss.backward()
print(w.grad)
print(b.grad)

tensor([[0.1282, 0.0171, 0.0993],
        [0.1282, 0.0171, 0.0993],
        [0.1282, 0.0171, 0.0993],
        [0.1282, 0.0171, 0.0993],
        [0.1282, 0.0171, 0.0993]])
tensor([0.1282, 0.0171, 0.0993])


In [5]:
z = torch.matmul(x, w)+b
print(z.requires_grad)

#this allows us to prevent gradient tracking in the block, more efficient
# useful whe we no longer want back-propagation for a bit
with torch.no_grad():
    z = torch.matmul(x, w)+b
print(z.requires_grad)

# can also use detatch() method on a tensor to do the same thing

True
False


In [None]:
#autograd keeps data (tensor) and executed operations (on each tensor) in a DAG,
# using this DAG, leaves are input tensors, and roots are output tensors,
# tracing from root to leaves, can compute gradients with chain rule

#in forward pass, autograd runs the operation and maintain the gradient funtion int the DAG
#in backward(), autograd computes gradients for each input, accumulated them for each respective tensor
#uses the chain rule, propagating weights all the way to the leaf tensor

#DAGs in pytorch are dynamic and recreated after each .backward() call
