# PyTorch Tutorial 03 - Gradient Calculation With Autograd

In [2]:
import torch

In [6]:
x = torch.randn(3, requires_grad=True)
x

tensor([-0.9960, -0.5469,  0.6769], requires_grad=True)

In [8]:
y = x + 2
y

tensor([1.0040, 1.4531, 2.6769], grad_fn=<AddBackward0>)

In [10]:
z = y * y * 2
z

tensor([ 2.0162,  4.2231, 14.3316], grad_fn=<MulBackward0>)

In [11]:
z = z.mean()
z

tensor(6.8570, grad_fn=<MeanBackward0>)

In [14]:
# TO calculate gradients, the only thing to do is to call .backward() function of the variable
z.backward()   # dz/dx

# the resulting gradients are gonna be stored in x.grad() memory
print(x.grad)

tensor([1.3387, 1.9375, 3.5692])


In [15]:
x

tensor([-0.9960, -0.5469,  0.6769], requires_grad=True)

In [16]:
## NOTE
# .backward() function uses Jacobian matrix to find the gradients
# To use Jacobian matrix:
# 1) we must have only 1 value of the final variable(in our case it is z). Previously, z has several values
# But then we bring it down to 1 value by applying z.mean()
# OR
# 2) we must provide equal sized vector to calculate Jacobian matrix. Look for details of it for more detail.
# in that case we can run like that

# z = y * y * 2
# v = torch.tensor([0.1, 1.0, 0.001], dtype=torch.float32) # it can be actually any vector but with the equal sized of z
# z.backward(v)

In [None]:
## TO drop requires_grad=True param, we have 3 options:
# 1) x.requires_grad_(False)
# 2) x.detach()
# 3) with torch.no_grad():
#       processing code

# 3rd option  will turn off the gradient calculation temporarily


In [17]:
x = torch.randn(3, requires_grad=True)
print(x)

x.requires_grad_(False)
print(x)

tensor([0.5782, 0.4571, 0.5383], requires_grad=True)
tensor([0.5782, 0.4571, 0.5383])


In [18]:
x = torch.randn(3, requires_grad=True)
print(x)

y = x.detach()
print(y)

tensor([-0.7298, -0.1294,  1.0051], requires_grad=True)
tensor([-0.7298, -0.1294,  1.0051])


In [19]:
x = torch.randn(3, requires_grad=True)
print(x)

with torch.no_grad():
    y = x + 2
    print(y)

tensor([-2.0934, -0.0456, -1.0464], requires_grad=True)
tensor([-0.0934,  1.9544,  0.9536])


In [24]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(1):
    model_output = (weights*3).sum()
    model_output.backward()

    print(weights.grad)

tensor([3., 3., 3., 3.])


In [25]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(2):
    model_output = (weights*3).sum()
    model_output.backward()

    print(weights.grad)

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])


In [26]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    model_output = (weights*3).sum()
    model_output.backward()

    print(weights.grad)


# So every time we do backwards, gradients are gonna be accumulated. 
# Before taking the further epochs / steps, we need to clear them to get accurate results
# to do it we use this function below
# .grad.zero()

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])


In [27]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    model_output = (weights*3).sum()
    model_output.backward()

    print(weights.grad)
    weights.grad.zero_()


tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


In [None]:
## Optimization

# params
weights = torch.ones(4, requires_grad=True)

# description of optimizer methods
optimizer = torch.optim.SGD(weights, lr=0.01)
# initiating the optimizer
optimizer.step()
# clearing the previous values of optimizer
optimizer.zero_grad()


# Note to run it we need all the processes together