# AUTOGRAD: AUTOMATIC DIFFERENTIATION
[AUTOGRAD: AUTOMATIC DIFFERENTIATION](https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html#sphx-glr-beginner-blitz-autograd-tutorial-py)

In [1]:
import torch
import numpy
import time
import traceback

# Tensor

* use grad

set tensors' attribute `.requires_grad` as `True`

call `.backward()`


The gradient for this tensor will be accumulated into `.grad` attribute.

* no grad

prevent tracking history, save memory

wrap the code block in` with torch.no_grad():`

## requires_grad 与 grad_fn属性

### 创建时指定

In [2]:
x = torch.ones(2, 2, requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)


### 函数计算结果 与 grad_fn

In [3]:
y = x + 2
print(y)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)


In [4]:
print(y.grad_fn)

<AddBackward0 object at 0x7f70939e3898>


In [5]:
z = y * y * 3
out = z.mean()
print(z)
print(out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>)
tensor(27., grad_fn=<MeanBackward0>)


### 修改requires_grad属性的方法

* a.requires_grad_(True) 函数
* 直接修改属性 a.requires_grad = True
* with torch.no_grad()

In [6]:
a = torch.randn(2, 2)
a = ((a * 3) / (a - 1))
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn)

False
True
<SumBackward0 object at 0x7f70939f37f0>


In [155]:
x = torch.randn(3, requires_grad=True)

print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    print((x ** 2).requires_grad)

True
True
False


# Gradient 

In [7]:
out.backward()  # 只能在标量上运行backward

# out.backward(retain_graph=True) # 保留buffer，可以重新推导

In [8]:
print(x.grad)
try:
    out.backward()  # backward一次,buffer会被清空
except Exception:
    traceback.print_exc()
print(y)
print(y.grad)  # 没有required,不记录梯度, backward一次,buffer会被清空
print(z.grad)  # 没有required,不记录梯度, backward一次,buffer会被清空

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])
tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)
None
None


Traceback (most recent call last):
  File "<ipython-input-8-1c48c9d7e862>", line 3, in <module>
    out.backward()  # backward一次,buffer会被清空
  File "/opt/conda/lib/python3.6/site-packages/torch/tensor.py", line 107, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph)
  File "/opt/conda/lib/python3.6/site-packages/torch/autograd/__init__.py", line 93, in backward
    allow_unreachable=True)  # allow_unreachable flag
RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.


## autograd

运行在张量上

In [75]:
x = torch.tensor([2, 3], dtype=torch.float, requires_grad=True)
print(x)
y = torch.zeros(2)
print(y)
y[0] = x[0]**2 + x[1] * 3
y[1] = x[0] * 2 + x[1]**2
print(y)
v = torch.tensor([1, 1], dtype=torch.float)
y.backward(v)
print(x.grad)

tensor([2., 3.], requires_grad=True)
tensor([0., 0.])
tensor([13., 13.], grad_fn=<CopySlices>)
tensor([6., 9.])


$$
x = 
 \left|
 \begin{matrix}
   x_1 \\
   x_2
 \end{matrix}
 \right|
 =
  \left|
 \begin{matrix}
   2 \\
   3
 \end{matrix}
 \right|
$$

$$
y = 
 \left|
 \begin{matrix}
   y_1 \\
   y_2
 \end{matrix}
 \right|
 =
  \left|
 \begin{matrix}
   {x_1}^2 + 3\times x_2 \\
   2 \times x_1 + {x_2}^2
 \end{matrix}
 \right|
 =
   \left|
 \begin{matrix}
   13 \\
   13
 \end{matrix}
 \right|
$$

$$
\frac{\partial y}{ \partial x} = 
 \left|
 \begin{matrix}
   \frac{\partial y1}{ \partial x1} & \frac{\partial y1}{ \partial x2} \\
   \frac{\partial y2}{ \partial x1} & \frac{\partial y2}{ \partial x2}
 \end{matrix}
 \right|
 =
  \left|
 \begin{matrix}
   2x_1 & 3\\
   2  & 2x_2
 \end{matrix}
 \right|
 =
   \left|
 \begin{matrix}
   4 & 3\\
   2  & 6
 \end{matrix}
 \right|
$$

$$
v = 
 \left|
 \begin{matrix}
   v_1 \\
   v_2
 \end{matrix}
 \right|
 =
  \left|
 \begin{matrix}
   1 \\
   1
 \end{matrix}
 \right|
$$

$$
v \times \frac{\partial y}{ \partial x}
=
  \left|
 \begin{matrix}
   1 &   1
 \end{matrix}
 \right|
 \times
  \left|
 \begin{matrix}
   4 & 3\\
   2  & 6
 \end{matrix}
 \right|
 =
   \left|
 \begin{matrix}
   6 & 9
 \end{matrix}
 \right|
$$

### 要求x的梯度的shape和x一致, 给了不同维度的v，也没有用

In [139]:
x = torch.tensor([2, 3], dtype=torch.float, requires_grad=True)
print(x)
y = torch.zeros(2)
y[0] = x[0]**2 + x[1] * 3
y[1] = x[0] * 2 + x[1]**2
print(y)
v1 = torch.tensor([[1, 1], [1, 1]], dtype=torch.float)
print(v1)
y.backward(v1, retain_graph=True)
print(x.grad)
y.backward(v1, retain_graph=True)
print(x.grad)

tensor([2., 3.], requires_grad=True)
tensor([13., 13.], grad_fn=<CopySlices>)
tensor([[1., 1.],
        [1., 1.]])
tensor([12., 18.])
tensor([24., 36.])


## 如何计算梯度呢？  (这里给向量的例子，其他维度的...）
retain_graph=True 会把雅可比矩阵和梯度保存, 但是每次手动把梯度置0,不然梯度会加上去

In [153]:
x = torch.tensor([2, 3], dtype=torch.float, requires_grad=True)
print(x)
y = torch.zeros(2)
y[0] = x[0]**2 + x[1] * 3
y[1] = x[0] * 2 + x[1]**2
print(y)

row, column = y.shape[0], x.shape[0]
print(row, column)
jacobian = torch.zeros(row, column, dtype=torch.float)
zz = torch.zeros(column, dtype=torch.float)

tensor([2., 3.], requires_grad=True)
tensor([13., 13.], grad_fn=<CopySlices>)
2 2


In [154]:
for r in range(row):
    temp_v = zz.clone()
    temp_v[r] = 1
    if x.grad is not None:
        x.grad.zero_()  # 这一步很重要！！！！！！！
    y.backward(temp_v, retain_graph=True)
    print(x.grad)
    jacobian[r, :] = x.grad

print(jacobian)

tensor([4., 3.])
tensor([2., 6.])
tensor([[4., 3.],
        [2., 6.]])
