In [45]:
import torch as t
from torch.autograd import Variable as V
import matplotlib.pyplot as plt

In [32]:
# requires_grad's default is false.If a leaf is True,its depended nodes are all True.
a = V(t.ones(3,4), requires_grad = True)
b = V(t.ones(3,4))
c = a+b
d = c.sum()  # calculate the sum
d.backward()  #back-propagation
a.grad,c.requires_grad,c.grad is None

(tensor([[ 1.,  1.,  1.,  1.],
         [ 1.,  1.,  1.,  1.],
         [ 1.,  1.,  1.,  1.]]), True, True)

In [33]:
a.is_leaf,b.is_leaf,c.is_leaf  # autograd only calculate the leaf.

(True, True, False)

In [15]:
c.data.sum() , c.sum()  # first is tensor,second is variable

(tensor(24.), tensor(24.))

In [44]:
# let's do an example
def f(x):
    y = x**2
    return y
def gradf(x):
    dx = 2*x
    return dx
x = V(t.randn(2,3),requires_grad = True)
y = f(x)
print(y)
y.backward(t.ones(x.size()))
x.grad,gradf(x)

tensor([[ 0.5345,  0.0538,  1.2794],
        [ 0.0284,  0.0145,  0.9274]])


(tensor([[ 1.4622,  0.4641, -2.2622],
         [-0.3371, -0.2405,  1.9260]]), tensor([[ 1.4622,  0.4641, -2.2622],
         [-0.3371, -0.2405,  1.9260]]))

In [48]:
x = V(t.ones(1))
b = V(t.rand(1),requires_grad=True)
w = V(t.rand(1),requires_grad=True)
y = w*x
z = y+b

In [52]:
#grad_fn is used to show the backward functions.grad_fn.next_functions is used to save input of grad_fn.
y.requires_grad,z.requires_grad,z.grad_fn,y.grad_fn  #We can see z and y's backward.

(True, True, <AddBackward1 at 0x2552a80ec88>, <MulBackward1 at 0x255299c5b38>)

In [53]:
z.grad_fn.next_functions  # we can see z[0][0] = y.grad_fn,z[1][0] = b.grad

((<MulBackward1 at 0x255299c5b38>, 0), (<AccumulateGrad at 0x255299c5518>, 0))

In [54]:
y.grad_fn.next_functions  #y[0][0]=w.grad  y[1][0]=x.grad(none)

((<AccumulateGrad at 0x255299c50f0>, 0), (None, 0))

In [55]:
# leaf's grad_fn is none
w.grad_fn,x.grad_fn

(None, None)

In [59]:
# retain_graph can stop to deleting the buffer
#multi-backward will make the grad accmulated.
z.backward(retain_graph = True)
w.grad

tensor([ 2.])

In [60]:
# pytoch uses Dynamic graphics.Every forward will recreate calculate picture.So we can use python's control setence(if,while,for) 

In [65]:
x = V(t.ones(3),requires_grad = True)
w = V(t.ones(3),requires_grad = True)
y = x*w
z = y.sum()
# because after backward,y's grad will automaticly delete.In order to find the y'grad,we can use autograd.grad() or hook
t.autograd.grad(z,y)

(tensor([ 1.,  1.,  1.]),)

In [67]:
def hook1(grad):
    print("y's grad:\n",grad)
x = V(t.ones(3),requires_grad = True)
w = V(t.ones(3),requires_grad = True)
y = x*w
hook_handle = y.register_hook(hook1)
z = y.sum()
z.backward()
hook_handle.remove()

y's grad:
 tensor([ 1.,  1.,  1.])


In [68]:
# let's see the grad_variables. z.backward() = y.backward(grad_y)
x = V(t.arange(0,3),requires_grad = True)
y = x**2 + x*2
z = y.sum()
z.backward()
x.grad

tensor([ 2.,  4.,  6.])

In [71]:
x = V(t.arange(0,3),requires_grad = True)
y = x**2 + x*2
z = y.sum()
grad_y = V(t.ones(1))
y.backward(grad_y)
x.grad

tensor([ 2.,  4.,  6.])