In [1]:
import torch

In [2]:
x = torch.tensor(2.0, requires_grad=True)
y = x.detach()
print(x)
print(y)

tensor(2., requires_grad=True)
tensor(2.)


In [5]:
# detach() 后对象不一样，但底层数据是共享的
print(id(x))
print(id(y))
print(x.untyped_storage().data_ptr())
print(y.untyped_storage().data_ptr())

1978282318640
1978282320880
6259612909696
6259612909696


In [6]:
print(x)
y.zero_()
print(x)

tensor(2., requires_grad=True)
tensor(0., requires_grad=True)


In [14]:
# 分别对x、y进行后续计算
z1 = x ** 2
z2 = y ** 2
print(z1)
print(z2)

tensor(0., grad_fn=<PowBackward0>)
tensor(0.)


In [15]:
z1.sum().backward()
# z2.sum().backward() # z2没有开启梯度计算，无法进行反向传播

In [17]:
x = torch.ones(2, 2, requires_grad=True)
y = x * x
print(x)
print(y)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)
tensor([[1., 1.],
        [1., 1.]], grad_fn=<MulBackward0>)


In [19]:
# 分离
u = y.detach()

In [20]:
# 让u参与新张量的运算
# z = x ^ 3 --> dz/dx = 3 * x ^ 2
# z = u * x --> dz/dx = u   detach()后视为常熟
z = u * x

In [21]:
# 反向传播，计算所有梯度
z.sum().backward()

In [23]:
print(x.grad)

tensor([[1., 1.],
        [1., 1.]])


In [24]:
print(x.grad == u)

tensor([[True, True],
        [True, True]])


## `detach()` vs `data`

In [35]:
# 用两组数据进行对比
x1 = torch.tensor([1.0, 2, 3], requires_grad=True)
x2 = torch.tensor([1.0, 2, 3], requires_grad=True)
print(x1)
print(x2)

tensor([1., 2., 3.], requires_grad=True)
tensor([1., 2., 3.], requires_grad=True)


In [36]:
y1 = x1.sigmoid()
y2 = x2.sigmoid()
print(y1)
print(y2)

tensor([0.7311, 0.8808, 0.9526], grad_fn=<SigmoidBackward0>)
tensor([0.7311, 0.8808, 0.9526], grad_fn=<SigmoidBackward0>)


In [27]:
# y1.sum().backward()
# print(x1.grad)

tensor([0.1966, 0.1050, 0.0452])


In [28]:
# y2.sum().backward()
# print(x2.grad)

tensor([0.1966, 0.1050, 0.0452])


In [37]:
z1 = y1.data
z2 = y2.detach()
print(z1)
print(z2)  # 目前看来z1和z2完全相同
print(z1.requires_grad)
print(z2.requires_grad)

tensor([0.7311, 0.8808, 0.9526])
tensor([0.7311, 0.8808, 0.9526])
False
False


In [38]:
z1.zero_()
z2.zero_()
print(y1)
print(y2)

tensor([0., 0., 0.], grad_fn=<SigmoidBackward0>)
tensor([0., 0., 0.], grad_fn=<SigmoidBackward0>)


In [40]:
# 对梯度计算的影响
# 使用data，最好只读取，不修改
y1.sum().backward()
print(x1.grad)  # 仍然可以计算，变为0，但是存在问题，只是将数据拿出来一份进行操作，而对原始梯度计算造成了影响

y2.sum().backward()  # 无法计算，直接报错，
print(x2.grad)

RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.