In [1]:
import torch
import numpy as np

In [3]:
x = torch.ones(5, requires_grad = True)
# Tells code it will need to calcualte gradients for this tensor in later steps
# False by default

print(x)

tensor([1., 1., 1., 1., 1.], requires_grad=True)


In [14]:
x = torch.randn(3, requires_grad = True)
# if later, we need to calculate the gradients of some funciton with respect to x, we set rg=T
print(x)


y = x + 2
"""
First, forward pass calculates the output y
Since, required_grad = True, PyTorch will automatically create and store a function for us
This function is then used in the back propogation to get the gradients
So y has an attribute "grad_fn" which points to a gradient function, in this case "AddBackwards"
With this function, we can calculate the gradients in the so called "backwards path"
    > This will calculate the gradient of y with respect to x, in this case
"""

print(y)
print(y.grad_fn) # AddBackwards object

z = y * y * 2
"""
z now also has a grad_fn attribute, this time MulBackwards
"""

z = z.mean()
"""
now gradient function is MeanBackwards
"""

"""
Now, the only thing that we need to do if we want to calculate gradient is call z.backward()
"""

z.backward() # Gives gradient of z wrt x i.e. dz/dx

print(x.grad) # Gradients of x

tensor([0.0125, 0.2046, 0.6536], requires_grad=True)
tensor([2.0125, 2.2046, 2.6536], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x0000020B856D5940>
None
None
tensor([2.6833, 2.9394, 3.5381])


  print(z.grad)
  print(z.grad)


In [None]:
# Sometimes you don't want autograd to track EVERY single gradient
# We can prevent autograd for tracking certain gradients
# 3 main ways of doing this:
# x.requires_grad = False
# y = x.detach()
# with torch.no_grad():
#     ...

In [None]:
"""
Whenever we call the x.backward() funciton, the gradient for this tensor will be accumulated into
the .grad attribute
"""


In [17]:
# An example

weights = torch.ones(4, requires_grad = True)

# Training loop
for epoch in range(2):
    model_output = (weights * 3).sum()
    
    
    
    model_output.backward() # We calculate gradient
    
    # Now we have the gradients, so we can call weights.grad
    print(weights.grad)
    
    # WITHOUT "weights.grad.zero_()"
    # First loop, this will be a tensor of size 4 full of 3s [3, 3, 3, 3]
    # Second iteration, will be the same but with 6es [6, 6, 6, 6]
    # Third iteration, will be 9s
    # This is clearly incorrect, we must empty the gradients first before we do the next iteration and
    # ... optimisation step
    # We do this via weights.grad.zero_()
    
    weights.grad.zero_()
    print(weights.grad)

tensor([3., 3., 3., 3.])
tensor([0., 0., 0., 0.])
tensor([3., 3., 3., 3.])
tensor([0., 0., 0., 0.])
