## 2.5 Automaticc Differentiation

In [1]:
from mxnet import autograd,np,npx
npx.set_np()

### 2.5.1 A Simple Example

In [26]:
x=np.arange(4.0)
x, x.grad

(array([0., 1., 2., 3.]), None)

In [28]:
# 为张量分配计算梯度的内存空间
x=np.arange(4.0)
x.attach_grad()
x, x.grad

(array([0., 1., 2., 3.]), array([0., 0., 0., 0.]))

In [3]:
# 对比 分配内存前、分配内存后、反向传播后 y.grad 的值
x=np.arange(4.0)
# 将计算函数置于 `autograd.record` 的范围用于构造计算图
with autograd.record():
    y=2*np.dot(x,x)
y, y.grad, x.grad

(array(28.), None, None)

In [4]:
x=np.arange(4.0)
with autograd.record():
    y=2*np.dot(x,x)
y.attach_grad()
y, y.grad, x.grad

(array(28.), array(0.), None)

In [10]:
# 必须为 x 变量提供计算梯度的内存后才可以对函数 y(x) 进行反向传播计算梯度
x=np.arange(4.0)
x.attach_grad()
with autograd.record():
    # y=2x^2, y'=4x
    y=2*np.dot(x,x)
y.backward()
y, y.grad, x.grad

(array(28.), None, array([ 0.,  4.,  8., 12.]))

In [18]:
with autograd.record():
    y=x.sum()
y.backward()
x.grad

array([1., 1., 1., 1.])

In [21]:
with autograd.record():
    y=x.mean()
x.attach_grad()
x.grad

array([0., 0., 0., 0.])

In [58]:
# 必须为 x 变量提供计算梯度的内存后才可以对函数 y(x) 进行反向传播计算梯度
x=np.arange(4.0)
x.attach_grad()
with autograd.record():
    # y=2x^2, y'=4x
    y=2*x*x
    z=3*y*y   # z'=48x^3
z.backward()
x, x.grad, y, y.grad, z, z.grad

(array([0., 1., 2., 3.]),
 array([   0.,   48.,  384., 1296.]),
 array([ 0.,  2.,  8., 18.]),
 None,
 array([  0.,  12., 192., 972.]),
 None)

In [62]:
x=np.arange(4.0)
x.attach_grad()
print(x,x.grad)
# 必须为 x 变量提供计算梯度的内存后才可以对函数 y(x) 进行反向传播计算梯度
with autograd.record():
    # y=2x^2, y'=4x
    y=2*x*x
    z=3*y*y
y.attach_grad()
print(x, x.grad, y, y.grad, z, z.grad)
z.backward()
print(x, x.grad, y, y.grad, z, z.grad)
x, x.grad, y, y.grad, z, z.grad

[0. 1. 2. 3.] [0. 0. 0. 0.]
[0. 1. 2. 3.] [0. 0. 0. 0.] [ 0.  2.  8. 18.] [0. 0. 0. 0.] [  0.  12. 192. 972.] None
[0. 1. 2. 3.] [   0.   48.  384. 1296.] [ 0.  2.  8. 18.] [0. 0. 0. 0.] [  0.  12. 192. 972.] None


(array([0., 1., 2., 3.]),
 array([   0.,   48.,  384., 1296.]),
 array([ 0.,  2.,  8., 18.]),
 array([0., 0., 0., 0.]),
 array([  0.,  12., 192., 972.]),
 None)

In [59]:
x=np.arange(4.0)
x.attach_grad()
print(x,x.grad)
# 必须为 x 变量提供计算梯度的内存后才可以对函数 y(x) 进行反向传播计算梯度
with autograd.record():
    # y=2x^2, y'=4x
    y=2*x*x
    z=3*y*y
y.attach_grad()
print(x, x.grad, y, y.grad, z, z.grad)
y.backward()
print(x, x.grad, y, y.grad, z, z.grad)
z.backward()
print(x, x.grad, y, y.grad, z, z.grad)
y, y.grad, z, z.grad, x.grad

[0. 1. 2. 3.] [0. 0. 0. 0.]
[0. 1. 2. 3.] [0. 0. 0. 0.] [ 0.  2.  8. 18.] [0. 0. 0. 0.] [  0.  12. 192. 972.] None
[0. 1. 2. 3.] [0. 0. 0. 0.] [ 0.  2.  8. 18.] [1. 1. 1. 1.] [  0.  12. 192. 972.] None
[0. 1. 2. 3.] [   0.   48.  384. 1296.] [ 0.  2.  8. 18.] [1. 1. 1. 1.] [  0.  12. 192. 972.] None


(array([ 0.,  2.,  8., 18.]),
 array([1., 1., 1., 1.]),
 array([  0.,  12., 192., 972.]),
 None,
 array([   0.,   48.,  384., 1296.]))

### 2.5.2 Backward for Non-Scalar Variables

In [70]:
x=np.arange(4.0)
x.attach_grad()
with autograd.record():
    y=x*x
    z=y*x
z.backward()
x, x.grad, y, y.grad, z, z.grad

(array([0., 1., 2., 3.]),
 array([ 0.,  3., 12., 27.]),
 array([0., 1., 4., 9.]),
 None,
 array([ 0.,  1.,  8., 27.]),
 None)

In [68]:
x=np.arange(4.0)
x.attach_grad()
with autograd.record():
    y=x*x
    u=y.detach()
    z=u*x
z.backward()
x, x.grad, y, y.grad, u, u.grad, z, z.grad

(array([0., 1., 2., 3.]),
 array([0., 1., 4., 9.]),
 array([0., 1., 4., 9.]),
 None,
 array([0., 1., 4., 9.]),
 None,
 array([ 0.,  1.,  8., 27.]),
 None)

### 2.5.4 Computing the Gradient of Python Control Flow

In [73]:
def f(x):
    y=x*2
    while np.linalg.norm(y)<1000:
        y=y*2
    if y.sum()>0:
        z=y
    else:
        z=100*y
    print(x,y,z)
    return z

In [77]:
x=np.random.normal()
x.attach_grad()
with autograd.record():
    y=f(x)
y.backward
x, x.grad, y, y.grad

-1.1688148 -1196.8663 -119686.63


(array(-1.1688148), array(0.), array(-119686.63), None)