In [None]:
# Autograd - Calculates gradient

In [1]:
 import torch

In [2]:
x = torch.randn(3)
print(x)

tensor([-0.8326,  1.2964, -1.8462])


In [None]:
# if later we want to calculate the gradients with respect to x: add the argument requires_grad=True

In [3]:
x = torch.randn(3, requires_grad=True)
print(x)
y=x+2
print(y)

tensor([-2.4848, -0.3108, -0.4682], requires_grad=True)
tensor([-0.4848,  1.6892,  1.5318], grad_fn=<AddBackward0>)


In [18]:
z = y*y*2
z = z.mean()
print(z) # z is a scaler value after the mean

tensor(3.6232, grad_fn=<MeanBackward0>)


In [19]:
z.backward() #dz/dx
print(x.grad)

tensor([-3.2321, 11.2615, 10.2118])


In [None]:
# One thing we should know is, in the background, this will create a vector jacobian product to get the 
# gradient... Jacobian matrix with partial derivatives multiplied by gradient vector to get final 
# gradient... also known as chain rule

In [21]:
z = y*y*2
v = torch.tensor([0.1,1.0,0.001],dtype=torch.float32)
z.backward(v) #dz/dx
print(x.grad)

tensor([-3.4260, 18.0185, 10.2179])


Prevent pytorch from tracking history of gradient f of n (fn)... 3 options:

In [None]:
# x.requires_grad_(False)
# x.detach() # creates new tensor that doesn't require the gradient
# with torch.no_grad():

In [22]:
x.requires_grad_(False)
print(x) #Doesn't have the requires_grad function

tensor([-2.4848, -0.3108, -0.4682])


In [23]:
y = x.detach()
print(y) #Again, doesn't have requires_grad

tensor([-2.4848, -0.3108, -0.4682])


In [25]:
with torch.no_grad():
    y = x + 2
    print(y) #Doesn't have gradient function attribute

tensor([-0.4848,  1.6892,  1.5318])


One thing to note: Whenever we call the backward function, the gradient of this tensor will be accumulated into the .grad attribute. values will be add up.. BE CAREFUL

In [29]:
weights = torch.ones(4,requires_grad=True)
for epoch in range(3): #Change the range and you'll see the accumulation
    model_output = (weights*3).sum()
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_() # Empty the gradient to fix... Important for training steps

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


Optimizers

In [None]:
optimizer = torch.optim.SGD(weights, lr=0.01)
optimizer.step()
optimizer.zero_grad()

Important recap:

In [None]:
weights = torch.ones(4, requires_grad=True)
z.backward()
weights.grad.zero_()