In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt

In [2]:
x = torch.rand(3, requires_grad = True)
x

tensor([0.0196, 0.9446, 0.8036], requires_grad=True)

In [3]:
y = x + 2
print(y)

tensor([2.0196, 2.9446, 2.8036], grad_fn=<AddBackward0>)


In [4]:
z = y*y*2
z = z.mean()
print(z)

tensor(13.7396, grad_fn=<MeanBackward0>)


In [5]:
z.backward()
print(x.grad)

tensor([2.6927, 3.9261, 3.7381])


In [6]:
z1 = 2 * y**2
print(z1)
z1.backward()
print(x.grad)

tensor([ 8.1572, 17.3413, 15.7204], grad_fn=<MulBackward0>)


RuntimeError: grad can be implicitly created only for scalar outputs

Why this happens? https://discuss.pytorch.org/t/loss-backward-raises-error-grad-can-be-implicitly-created-only-for-scalar-outputs/12152

In [7]:
z1 = 2 * y**2
print(z1)
v = torch.rand(z1.shape[0])
z1.backward(v)
print(x.grad)

tensor([ 8.1572, 17.3413, 15.7204], grad_fn=<MulBackward0>)
tensor([ 9.5471, 14.0978, 12.3998])


How to prevent calculate gradients:

 - `x.requires_grad(False)`
 - `x.detach()`
 - `with torch.no_grad():`

In [8]:
print(x)
x.requires_grad_(False)
print(x)
x.requires_grad_(True)

tensor([0.0196, 0.9446, 0.8036], requires_grad=True)
tensor([0.0196, 0.9446, 0.8036])


tensor([0.0196, 0.9446, 0.8036], requires_grad=True)

In [9]:
y = x.detach()
print(x)
print(y)

tensor([0.0196, 0.9446, 0.8036], requires_grad=True)
tensor([0.0196, 0.9446, 0.8036])


In [10]:
with torch.no_grad():
    y1 = x + 2
    print(y1)

tensor([2.0196, 2.9446, 2.8036])


In [26]:
weights = torch.ones(4, requires_grad = True)
print(weights)

tensor([1., 1., 1., 1.], requires_grad=True)


In [27]:
for epoch in range(3):
    model_output = weights.sum()
    model_output.backward()
    print(weights.grad)

tensor([1., 1., 1., 1.])
tensor([2., 2., 2., 2.])
tensor([3., 3., 3., 3.])


In [29]:
weights.grad.zero_()
for epoch in range(3):
    model_output = weights.sum()
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_()

tensor([1., 1., 1., 1.])
tensor([1., 1., 1., 1.])
tensor([1., 1., 1., 1.])


In [37]:
weights.data

tensor([1., 1., 1., 1.])

In [41]:
weights = torch.ones(4, requires_grad = True)
optim = torch.optim.SGD([weights], lr = 0.1)
optim.step()
optim.zero_grad()