In [1]:
import torch

### Tensor

In [19]:
x = torch.ones([2, 2], requires_grad=True)
x, x.grad_fn

(tensor([[1., 1.],
         [1., 1.]], requires_grad=True),
 None)

In [20]:
y = x + 2
y, y.grad_fn

(tensor([[3., 3.],
         [3., 3.]], grad_fn=<AddBackward0>),
 <AddBackward0 at 0x21443babfa0>)

In [21]:
x.is_leaf, y.is_leaf

(True, False)

In [22]:
z = y * y * 3
out = torch.mean(z)
z, out

(tensor([[27., 27.],
         [27., 27.]], grad_fn=<MulBackward0>),
 tensor(27., grad_fn=<MeanBackward0>))

In [23]:
a = torch.randn([3, 3]) # 默认require_grad=False
a = ((a * 3) / (a - 1))
print(a.requires_grad, a.grad_fn)

a.requires_grad_(True)
print(a.requires_grad, a.grad_fn)

b = (a * a).sum()
print(b.requires_grad, b.grad_fn)

False None
True None
True <SumBackward0 object at 0x000002144C5B5BD0>


### 梯度
Tensor.backward()开始反向传播，建立梯度。

In [24]:
out.backward()
x.grad

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])

In [26]:
# 注意反向传播梯度累加
out2 = x.sum()
out2.backward()
print(x.grad)

x.grad.data.zero_()
out3 = x.sum()
out3.backward()
print(x.grad)

tensor([[6.5000, 6.5000],
        [6.5000, 6.5000]])
tensor([[1., 1.],
        [1., 1.]])


In [37]:
x = torch.tensor([1.0, 2.0, 3.0, 4.0], requires_grad=True)
y = 2 * x
z = y.view(2, 2)
u = torch.ones([2, 2], dtype=torch.float32)
v = torch.tensor([[1.0, 0.1], [0.01, 0.001]], dtype=torch.float32)

z.backward(u)
print(x.grad)

tensor([2., 2., 2., 2.])


In [39]:
x = torch.tensor([1.0], requires_grad=True)
y1 = x ** 2
with torch.no_grad():
    y2 = x ** 3
y3 = y1 + y2
x.requires_grad, y1.requires_grad, y2.requires_grad, y3.requires_grad

(True, True, False, True)

In [40]:
y3.backward()
x.grad # 由于y2没有梯度，所以x的梯度只有y1的，也就是2

tensor([2.])

In [47]:
# 对data操作可以修改Tensor的值，同时梯度不被记录，不影响反向传播
x = torch.tensor([1.0], requires_grad=True)
y = 2 * x
y *= 2
y.backward()
print(y, x.grad)

z = 2 * x
z.data *= 2
x.grad.data.zero_()
z.backward()
print(z, x.grad)

tensor([4.], grad_fn=<MulBackward0>) tensor([4.])
tensor([4.], grad_fn=<MulBackward0>) tensor([2.])
