In [None]:
import torch

In [None]:
# Remember, if requires_grad=True, the tensor object keeps track of how it was created.
x = torch.tensor([1., 2., 3.], requires_grad=True)
y = torch.tensor([4., 5., 6.], requires_grad=True)
#Notice that both x and y have their requires_grad set to true, therefore we can compute gradients with respect ot them
z = x + y
print(z)
#z knows that it was created as a result of addition of x and y. It knows that it wasn't read in from a file
print(z.grad_fn)
#And if we go further on this
s = z.sum()
print(s)
print(s.grad_fn)

In [None]:
#Now if we backpropagate on s, we can find the gradients of s with respect to x
s.backward()
print(x.grad)  #ds/dx
print(y.grad)  #ds/dy
print(z.grad)

print(z.grad) produces a warning. The reason is because by default PyTorch only populate .grad for leaf variables (variables that aren't results of operations), which is x and y in our example. To ensure .grad is also populated for non-leaf variables like z, we need to call their .retain_grad() method.

In [None]:
# By default,tensors have 'requires_grad=False'
x = torch.randn(2,2)
y = torch.randn(2,2)
print(x.requires_grad, y.requires_grad)
z = x + y
#so you can't backprop through z
print(z.grad_fn)
#another way to set the requires_grad=True is 
x.requires_grad_()
y.requires_grad_()
# z contains enough information to compute gradients, as we saw above
z = x + y
print(z.grad_fn)
# If any input to an operation has 'requires_grad=True', so will the output
print(z.requires_grad)
#Now z has the computation history that relates itself to x and y

new_z = z.detach()
print(new_z.grad_fn)
#z.detach() returns a tensor that shares the same storage as 'z', but with the computation history forgotten. 
# It doesn't know anything about how it was computed. In other words, we have broken the tensor away from its past history

#You can also stop autograd from tracking history on tensors. This concept is usedful when applying Transfer Learning
#when we want to freeze the network, we don't want to update the weights. we just want to do that for specific layers and this is how we do it
print(x.requires_grad)
print((x+10).requires_grad)

#the following 2 lines are used to stop autograd from tracking
with torch.no_grad():
    print((x+10).requires_grad)

In PyTorch, the grad_fn attribute of a tensor represents the function that created that tensor. This attribute is None for tensors that were not the result of a computation that requires gradient tracking.

Let's understand it in easier way. Now, imagine you have two numbers, one called x and another called y. These numbers can add, subtract, or do other math stuff.

When you add x and y to get a new number, let's call it z, it's like saying 2 + 3 = 5. You know the answer is 5, but you don't remember that you added 2 and 3 to get there.

In PyTorch, if you set requires_grad=False for x and y, it's like saying, "I don't care how 2 and 3 made 5; just give me the result 5." So, PyTorch doesn't remember how z was created, and z.grad_fn is None.

But if you set requires_grad=True for x and y, it's like saying, "Hey, I want to know not just the result but also how 2 and 3 made 5. Keep track of that for me." PyTorch will remember the math (2 + 3) and can tell you how changing 2 or 3 would affect the result, which is useful for things like training machine learning models.

In [None]:
# let's walk in through one last example
x = torch.ones(2, 2, requires_grad=True)
print(x)
y = x + 2
print(y)
print(y.grad_fn)
z = y*y*3
out = z.mean()
print('\nprinting z and out:')
print(z, out)
out.backward()
print(x.grad)