# Automatic differentiation

In [1]:
from mxnet import autograd, np, npx
npx.set_np()

## Simple example

In [2]:
x = np.arange(4)
x

array([0., 1., 2., 3.])

Allocate memory for gradient of x, the gradient x will be an attribute for the `ndarray` x

In [3]:
x.attach_grad()

Access to gradient of x, as default are initialize to zeroes

In [4]:
x.grad

array([0., 0., 0., 0.])

## Building gradient graph on the fly

$$y = 2x^{T}x$$
$$\frac{dy}{dx} = 4x $$

In [5]:
with autograd.record():
    y = 2 * np.dot(x, x)

In [6]:
y

array(28.)

Automatic calculate the gradient of y with respect to each component of x by calling

In [7]:
y.backward()

Check x grad

In [8]:
x.grad #This is dy/dx

array([ 0.,  4.,  8., 12.])

Verify our gradient

In [9]:
x.grad == 4*x

array([ True,  True,  True,  True])

If we subsequently compute the gradients of another variable whose value was calculated as a function of x, the contents of x.grad will be overwritten

In [10]:
with autograd.record():
    y = x.sum()
y.backward()
x.grad

array([1., 1., 1., 1.])

## Backward for Non-Scalar variables

In [11]:
with autograd.record():
    y = x * x # y is a vector
y.backward()

u = x.copy()
u.attach_grad() # locate memory space to grad
with autograd.record():
    v = (u*u).sum() # v is a scalar
v.backward()

x.grad == u.grad

array([ True,  True,  True,  True])

## Detaching computation

In [12]:
with autograd.record():
    y = x * x
    u = y.detach() # trait u as a constant
    z = u * x
z.backward() # Calculate dz/dx=u
print(x.grad == u)
print(x.grad)

[ True  True  True  True]
[0. 1. 4. 9.]


In [13]:
y.backward() # Get dy/dx = 2*x
print(x.grad == 2*x)
print(x.grad)

[ True  True  True  True]
[0. 2. 4. 6.]


Attaching gradients to x implicily calls `x = x.detach()`

In [14]:
y = np.ones(4) * 2
y.attach_grad() # Create grad allocation
with autograd.record():
    u = x * y
    u.attach_grad() # Implicitly run u = u.detach()
    z = 5 * u - x
z.backward() # calculate dz/du = 5, dz/dx = -1, dz/dy = 0 y is constant
x.grad, u.grad, y.grad

(array([-1., -1., -1., -1.]), array([5., 5., 5., 5.]), array([0., 0., 0., 0.]))

## Computing the gradient of Python control flow

In [15]:
def f(a):
    b = a * 2
    while np.linalg.norm(b) < 1000:
        b = b * 2
    if b.sum() > 0:
        c = b
    else:
        c = 100 * b
    return c

In [16]:
a = np.random.normal()
a.attach_grad() # Locate memory for grad df/da, f = k * a = > df/da = k
                # => f/a = k

with autograd.record():
    d = f(a)
d.backward()

In [17]:
a.grad == d/a

array(True)

## Training Mode and Prediction Mode

In [18]:
print(autograd.is_training())
with autograd.record():
    print(autograd.is_training())

False
True


# Exercises

## exercise 2

In [19]:
x = np.ones(4) * 2
x.attach_grad()

with autograd.record():
    y = 3 * x

y.backward(retain_graph=True)
print(x.grad)

y.backward() # kernel die
print(x.grad)

[3. 3. 3. 3.]
[3. 3. 3. 3.]


## exercise 3

In [20]:
def f(a):
    b = a * 2
    while np.linalg.norm(b) < 1000:
        b = b * 2
    if b.sum() > 0:
        c = b
    else:
        c = 100 * b
    return c

a = np.random.normal(size = (3,2))
print(a)

a.attach_grad() # Locate memory for grad df/da, f = k * a = > df/da = k
                # => f/a = k

with autograd.record():
    d = f(a)
d.backward()

a.grad == d/a

[[0.7740038  0.4838046 ]
 [1.0434403  0.29956347]
 [1.1839255  0.15302546]]


array([[ True,  True],
       [ True,  True],
       [ True,  True]])

## exercise 5

In [27]:
f = lambda x: np.sin(x)
df = lambda x: np.cos(x)

x = np.linspace(0, 10, 200)
x.attach_grad()
with autograd.record():
    y = f(x)
y.backward()

In [29]:
f = lambda x: np.sin(x)
df = lambda x: np.cos(x)

x = np.linspace(0, 10, 200)
x.attach_grad()
with autograd.record():
    y = sum(f(x))
y.backward()

In [33]:
%matplotlib widget
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(8, 8))
plt.subplot(3,1,1)
plt.plot(x, f(x))
plt.title("sin(x)")
plt.subplot(3,1,2)
plt.plot(x, x.grad)
plt.title("grad sin(x) using autograd")
plt.subplot(3,1,3)
plt.plot(x, df(x))
plt.title("cos(x)")
plt.show()

plt.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [23]:
x = np.arange(4)
x.attach_grad()
with autograd.record():
    y = 2*np.dot(x,x)  + 0*x
y.backward()
x.grad

array([ 0., 16., 32., 48.])

In [24]:
y

array([28., 28., 28., 28.])

In [25]:
x = np.arange(4)
x.attach_grad()
with autograd.record():
    y = sum(2*np.dot(x,x)  + 0*x)
y.backward()
x.grad

array([ 0., 16., 32., 48.])

In [26]:
y

array(112.)