# Gradient Computation

## Gradient

In [1]:
import torch

In [2]:
x = torch.randn(3, requires_grad=True)
print(x)

tensor([ 1.3567,  0.8755, -0.2205], requires_grad=True)


In [3]:
y = x + 2
print(y)

tensor([3.3567, 2.8755, 1.7795], grad_fn=<AddBackward0>)


<img src="images/1.jpg" width=600>

In [4]:
z = y * y * 2
print(z)

tensor([22.5349, 16.5370,  6.3329], grad_fn=<MulBackward0>)


In [5]:
z = z.mean()
print(z)

tensor(15.1349, grad_fn=<MeanBackward0>)


In [6]:
z.backward() # dz/dx (Will not work if requires_grad=False)
print(x.grad)

tensor([4.4756, 3.8340, 2.3726])


For non-scalar

In [7]:
x = torch.randn(3, requires_grad=True)
y = x + 2
z = y * y * 2
print(z)
try:
    z.backward()
except RuntimeError as e:
    print("Error: ", e)
finally:
    v = torch.tensor([0.1, 1.0, 0.001], dtype=torch.float)
    z.backward(v)
    print(x.grad)

tensor([7.1237, 9.5409, 8.9240], grad_fn=<MulBackward0>)
Error:  grad can be implicitly created only for scalar outputs
tensor([7.5492e-01, 8.7365e+00, 8.4494e-03])


## Prevent Gradient

In [8]:
x = torch.randn(3, requires_grad=True)
print(x)
x.requires_grad_(False)
print(x)

tensor([-1.1390,  0.0124, -0.4944], requires_grad=True)
tensor([-1.1390,  0.0124, -0.4944])


In [9]:
x = torch.randn(3, requires_grad=True)
print(x)
x = x.detach()
print(x)

tensor([-0.5216, -0.3057,  1.9626], requires_grad=True)
tensor([-0.5216, -0.3057,  1.9626])


In [10]:
x = torch.randn(3, requires_grad=True)

with torch.no_grad():
    y = x * 2
    print(y) # No gradients are computed for y

tensor([ 3.7750,  1.7014, -0.7601])


## Gradient Accumulates While Training

In [11]:
weights = torch.ones(3, requires_grad=True)
for epoch in range(3):
    model_output = (weights * 3).sum()
    model_output.backward()
    print(weights.grad)

tensor([3., 3., 3.])
tensor([6., 6., 6.])
tensor([9., 9., 9.])


In [12]:
weights = torch.ones(3, requires_grad=True)
for epoch in range(3):
    model_output = (weights * 3).sum()
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_()

tensor([3., 3., 3.])
tensor([3., 3., 3.])
tensor([3., 3., 3.])
