In [1]:
import torch

# 标量的反向传播
x = torch.Tensor([2])

# autograd: set require_grad = True
w = torch.randn(1, requires_grad= True)
b = torch.randn(1, requires_grad= True)

# 前向传播，计算图，分步计算
y = torch.mul(x, w)
z = torch.add(y, b)

# 查看是否为is_leaf叶子节点
print('x,y,z,w,b是否为叶子节点:{},{},{},{},{}'.format(x.is_leaf, y.is_leaf, z.is_leaf, w.is_leaf, b.is_leaf))

# 查看子节点x, w, b的requires_grad属性
print('x,w, b 的属性为{},{},{}'.format(x.requires_grad, w.requires_grad, b.requires_grad))

# 查看非叶子节点的requires_grad梯度属性
print('y, z 的 require_grad属性：{},{}'.format(y.requires_grad, z.requires_grad)) #与w, b有依赖关系

# 查看叶子节点的grad_fn属性
print('x, w, b grad_fn is {},{},{}'.format(x.grad_fn, w.grad_fn, b.grad_fn))

# 查看非叶子节点的grad_fn属性
print('y, z grad_fn is {},{}'.format(y.grad_fn, z.grad_fn))

# 反向传播计算梯度
# z.backward() #此时不保留图graph，梯度清零
z.backward(retain_graph = True) # 梯度累加 ？

# 叶子节点：参数w b梯度, x无需求导 故为none
print('w, b grad is{},{}'.format(w.grad, b.grad))
# 非叶子节点 y z 梯度
print('y, z grad is {}, {}'.format(y.grad, z.grad))

x,y,z,w,b是否为叶子节点:True,False,False,True,True
x,w, b 的属性为False,True,True
y, z 的 require_grad属性：True,True
x, w, b grad_fn is None,None,None
y, z grad_fn is <MulBackward0 object at 0x111b16e50>,<AddBackward0 object at 0x111b16ed0>
w, b grad istensor([2.]),tensor([1.])
y, z grad is None, None


In [3]:
# 返回，自动求导
# z.backward(retain_graph= True) 保留图，否则运行一次缓存被释放
z.backward()
# 看梯度
print('参数w,b的梯度为:{},{}'.format(w.grad, b.grad))

RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.

计算下面这个函数的导函数：
$$
y = x^2\bullet e^x
$$
它的导函数是：
$$
{dy \over dx} = 2x\bullet e^x + x^2 \bullet e^x
$$
来看看autograd的计算结果与手动求导计算结果的误差。

In [58]:
import torch as t 
def f(x):
    '''计算y'''
    y = x**2 * t.exp(x)
    return y 
def gradf(x):
    '''手动求导函数'''
    dx = 2 * x * t.exp(x) + x ** 2 * t.exp(x)
    return dx

# 测试一个tensor：x
x = t.randn(3,4, requires_grad = True)
y = f(x)
y.backward(t.ones(y.size()))

# 比较两种求导方法
print('grad by torch-autograd:',x.grad)
print('grad by def:', gradf(x))

grad by torch-autograd: tensor([[-4.4350e-01,  6.9511e+00, -4.1923e-01, -2.3527e-02],
        [ 1.1022e+00, -1.7225e-01,  3.1054e+00, -2.1411e-01],
        [ 2.2655e-01,  4.5613e+01,  1.4302e+00,  3.2563e-01]])
grad by def: tensor([[-4.4350e-01,  6.9511e+00, -4.1923e-01, -2.3527e-02],
        [ 1.1022e+00, -1.7225e-01,  3.1054e+00, -2.1411e-01],
        [ 2.2655e-01,  4.5613e+01,  1.4302e+00,  3.2563e-01]],
       grad_fn=<AddBackward0>)


In [60]:
# 计算图，测试grad_fn
x = t.ones(1)
b = t.rand(1, requires_grad = True)
w = t.rand(1, requires_grad = True)
y = w.mul(x)
z = y.add(b)

In [84]:
# grad_fn.net_functions,元组的形式，回溯fn，动态图
print(z.grad_fn)
print(z.grad_fn.next_functions)
z.grad_fn.next_functions[0][0] == y.grad_fn

# 保存计算图梯度的buffer，多次反向传播，梯度累加
z.backward(retain_graph=True)
w.grad

<AddBackward0 object at 0x12fc0b290>
((<MulBackward0 object at 0x12fc0bb90>, 0), (<AccumulateGrad object at 0x12fc0bc10>, 0))


tensor([24.])

In [88]:
# 测试推理时，不需要反向传播，求导，即可以关闭自动求导autograd，反向传播，节省内存，显存开销
t.set_grad_enabled(False)
# 或
with t.no_grad():
    x = t.ones(1)
    w = t.rand(1, requires_grad = True)
    y = x.mul(w)
# 按理说y应该是True，因为叶子节点w是True，但是设置了不使用grad所以为False
print(x.requires_grad, w.requires_grad, y.requires_grad)

False True False


In [129]:
# 想改变tensor的数据，而不影响grad时
x = t.rand(10, requires_grad = True)
print(x)
# 直接访问tensor的data，而不影响grad，独立计算图之外
print(x.data)

#改变数据
x.data *= 100
print(x)

tensor([0.0893, 0.1730, 0.1023, 0.2946, 0.5129, 0.2371, 0.1064, 0.9711, 0.8577,
        0.0347], requires_grad=True)
tensor([0.0893, 0.1730, 0.1023, 0.2946, 0.5129, 0.2371, 0.1064, 0.9711, 0.8577,
        0.0347])
tensor([ 8.9289, 17.2977, 10.2297, 29.4627, 51.2926, 23.7130, 10.6423, 97.1148,
        85.7733,  3.4686], requires_grad=True)


In [None]:
# 非叶子节点的导数计算完之后即被清空,使用autograd.grad函数,hook函数，查看梯度

