In [1]:
#automatic with torch.autograd
import torch

x = torch.ones(5)  # input tensor
y = torch.zeros(3)  # expected output
w = torch.randn(5, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)
z = torch.matmul(x, w)+b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)

In [3]:
print('Gradient function for z =',z.grad_fn)
print('Gradient function for loss =', loss.grad_fn)

Gradient function for z = <AddBackward0 object at 0x000001C67B2F6F70>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward object at 0x000001C67B2F6FD0>


In [4]:
#计算梯度
loss.backward()
print(w.grad)
print(b.grad)

tensor([[0.3326, 0.3153, 0.3318],
        [0.3326, 0.3153, 0.3318],
        [0.3326, 0.3153, 0.3318],
        [0.3326, 0.3153, 0.3318],
        [0.3326, 0.3153, 0.3318]])
tensor([0.3326, 0.3153, 0.3318])


In [5]:
#禁用渐变跟踪
z = torch.matmul(x, w)+b
print(z.requires_grad)

with torch.no_grad():
    z = torch.matmul(x, w)+b
print(z.requires_grad)

True
False


In [6]:
#另一种禁用方法
z = torch.matmul(x, w)+b
z_det = z.detach()
print(z_det.requires_grad)

False


In [7]:
#禁用梯度跟踪原因
#将神经网络某些参数固定是微调神经网络常用方法
#在仅向前传递时加快计算速度

In [8]:
#计算图
#autograd 在由Function 对象组成的有向无环图 (DAG) 中保存数据（张量）和所有已执行操作（以及生成的新张量）的记录 。在这个 DAG 中，叶子是输入张量，根是输出张量。通过从根到叶跟踪此图，您可以使用链式法则自动计算梯度。
#前向传递，autograd计算结果张量，在dag中维护操作的梯度函数
#在反向传递中，计算每个梯度.grad_fn，将他们积累到.grad属性中，使用链式法则传播到叶张量

In [9]:
#计算雅可比乘积
inp = torch.eye(5, requires_grad=True)
out = (inp+1).pow(2)
out.backward(torch.ones_like(inp), retain_graph=True)
print("First call\n", inp.grad)
out.backward(torch.ones_like(inp), retain_graph=True)
print("\nSecond call\n", inp.grad)
inp.grad.zero_()
out.backward(torch.ones_like(inp), retain_graph=True)
print("\nCall after zeroing gradients\n", inp.grad)

First call
 tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.],
        [2., 2., 2., 2., 4.]])

Second call
 tensor([[8., 4., 4., 4., 4.],
        [4., 8., 4., 4., 4.],
        [4., 4., 8., 4., 4.],
        [4., 4., 4., 8., 4.],
        [4., 4., 4., 4., 8.]])

Call after zeroing gradients
 tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.],
        [2., 2., 2., 2., 4.]])
