In [1]:
import torch

# 尽量不要自己给自己赋值
## 错误例子：自己给自己赋值

In [2]:
eta1 = torch.nn.Parameter(torch.rand(10), requires_grad=True)
eta1

Parameter containing:
tensor([0.9978, 0.0706, 0.6378, 0.3654, 0.2720, 0.1149, 0.0122, 0.7917, 0.9183,
        0.0512], requires_grad=True)

这里本来想让eta乘以2

In [3]:
eta1 = eta1 * 2

In [4]:
eta1

tensor([1.9955, 0.1412, 1.2757, 0.7307, 0.5439, 0.2298, 0.0243, 1.5833, 1.8366,
        0.1025], grad_fn=<MulBackward0>)

In [5]:
eta1.sum().backward()

结果可以看出没有梯度了

In [6]:
eta1.grad

  return self._grad


## 正确例子：赋值给其他变量名

In [7]:
eta2 = torch.nn.Parameter(torch.rand(10), requires_grad=True)
eta2

Parameter containing:
tensor([0.2090, 0.4850, 0.3451, 0.9455, 0.2797, 0.2960, 0.1259, 0.3914, 0.3573,
        0.1584], requires_grad=True)

In [8]:
eta_temp = eta2 * 2

In [9]:
eta_temp

tensor([0.4179, 0.9700, 0.6903, 1.8910, 0.5594, 0.5921, 0.2518, 0.7829, 0.7146,
        0.3169], grad_fn=<MulBackward0>)

In [10]:
eta_temp.sum().backward()

In [11]:
eta2.grad

tensor([2., 2., 2., 2., 2., 2., 2., 2., 2., 2.])

# 小心直接赋值其他变量
## 错误例子：直接赋值给别的变量后，内存里还是同一个变量

In [12]:
v1 = torch.nn.Parameter(torch.rand(10), requires_grad=True)
v1

Parameter containing:
tensor([0.7064, 0.7076, 0.1082, 0.1147, 0.1358, 0.9714, 0.9704, 0.2618, 0.8911,
        0.3691], requires_grad=True)

把`v1`赋值给`a`

In [13]:
a = v1[:4]

修改`a`的值

In [14]:
a.data.copy_(torch.rand(4))
a

tensor([0.7943, 0.3477, 0.8388, 0.9612], grad_fn=<SliceBackward0>)

发现`v1`的值也被改了

In [15]:
v1

Parameter containing:
tensor([0.7943, 0.3477, 0.8388, 0.9612, 0.1358, 0.9714, 0.9704, 0.2618, 0.8911,
        0.3691], requires_grad=True)

## 正确例子：用克隆的方法

In [16]:
v2 = torch.nn.Parameter(torch.rand(10), requires_grad=True)
v2

Parameter containing:
tensor([0.9978, 0.7118, 0.1021, 0.7963, 0.8129, 0.4852, 0.9747, 0.3141, 0.0183,
        0.8853], requires_grad=True)

In [17]:
b = v2[:4].clone()

In [18]:
b.data.copy_(torch.rand(4))
b

tensor([0.2075, 0.9243, 0.8223, 0.9775], grad_fn=<CloneBackward0>)

In [19]:
v2

Parameter containing:
tensor([0.9978, 0.7118, 0.1021, 0.7963, 0.8129, 0.4852, 0.9747, 0.3141, 0.0183,
        0.8853], requires_grad=True)

# 生成可学习参数时先生成，最后再包装
## 错误例子：先生成Parameter，再变形

In [20]:
t = torch.nn.Parameter(torch.randn(12,1), requires_grad=True)
t

Parameter containing:
tensor([[-1.2056],
        [ 0.4865],
        [-0.0678],
        [ 0.6065],
        [-0.6478],
        [ 1.5539],
        [ 2.1328],
        [-1.8299],
        [-0.1069],
        [-0.2585],
        [ 0.6403],
        [-0.3970]], requires_grad=True)

In [21]:
t = t.view(3,4)
t

tensor([[-1.2056,  0.4865, -0.0678,  0.6065],
        [-0.6478,  1.5539,  2.1328, -1.8299],
        [-0.1069, -0.2585,  0.6403, -0.3970]], grad_fn=<ViewBackward0>)

In [22]:
t.sum().backward()

In [23]:
t.grad

## 正确例子：先全部初始化完成，再包装成Parameter

In [24]:
k = torch.randn(12,1)
k

tensor([[ 0.4591],
        [ 1.1992],
        [-1.8990],
        [ 1.6807],
        [-1.4025],
        [-1.0175],
        [-0.7314],
        [ 0.5964],
        [ 0.8397],
        [ 0.4601],
        [ 0.5969],
        [-1.1249]])

In [25]:
k = k.view(3,4)

In [26]:
kp = torch.nn.Parameter(k, requires_grad=True)

In [27]:
kp.sum().backward()

In [28]:
kp.grad

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

# 警惕组装变量时的错误
## 错误例子：把变量直接拼起来

In [29]:
a = torch.nn.Parameter(torch.randn(5), requires_grad=True)

In [30]:
a0 = a[0] * 2
a1 = torch.log(a[1]+1)
a2 = a[2] ** 2
a3 = torch.nn.functional.softplus(a[3] * 2)
a4 = a[4]
a0, a1, a2, a3, a4

(tensor(2.7823, grad_fn=<MulBackward0>),
 tensor(nan, grad_fn=<LogBackward0>),
 tensor(0.0822, grad_fn=<PowBackward0>),
 tensor(0.9989, grad_fn=<SoftplusBackward0>),
 tensor(1.0601, grad_fn=<SelectBackward0>))

组合成一个新变量

In [31]:
A1 = torch.tensor([a0, a1, a2, a3, a4])
A1

tensor([2.7823,    nan, 0.0822, 0.9989, 1.0601])

由于重新用`tensor`包装了变量，切断了反向传播

In [32]:
A1.sum().backward()

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

## 正确例子：建立一个变量，然后把值填进去

In [33]:
a

Parameter containing:
tensor([ 1.3911, -1.0731,  0.2867,  0.2698,  1.0601], requires_grad=True)

In [34]:
A2 = torch.zeros([5])

In [35]:
A2[0] = a[0] * 2
A2[1] = torch.log(a[1]+1)
A2[2] = a[2] ** 2
A2[3] = torch.nn.functional.softplus(a[3] * 2)
A2[4] = a[4]
A2

tensor([2.7823,    nan, 0.0822, 0.9989, 1.0601], grad_fn=<CopySlices>)

就可以使用了

In [36]:
A2.sum().backward()

In [37]:
a.grad

tensor([  2.0000, -13.6835,   0.5734,   1.2634,   1.0000])