## Getting Gradients of an Intermediate Variable in PyTorch

### tensorflow 计算梯度

In [1]:
import tensorflow as tf

g = tf.Graph()
with g.as_default() as g:
    
    x = tf.placeholder(dtype=tf.float32, shape=None, name='x')
    w = tf.Variable(initial_value=2, dtype=tf.float32, name='w')
    b = tf.Variable(initial_value=1, dtype=tf.float32, name='b')
    
    u = x * w
    v = u + b
    a = tf.nn.relu(v)
    
    d_a_x = tf.gradients(a, x)
    d_a_w = tf.gradients(a, w)
    d_a_b = tf.gradients(a, b)
    d_a_u = tf.gradients(a, u)
    d_a_v = tf.gradients(a, v)


with tf.Session(graph=g) as sess:
    sess.run(tf.global_variables_initializer())
    grads = sess.run([d_a_x, d_a_w, d_a_b, d_a_u, d_a_v], feed_dict={'x:0': 3})

print(grads)


[[2.0], [3.0], [1.0], [1.0], [1.0]]


### pytorch 获取中间梯度

#### via autograd's grad

In [2]:
import torch
import torch.nn.functional as F
from torch.autograd import grad

x = torch.tensor([3.], requires_grad=True)
w = torch.tensor([2.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

u = x * w
v = u + b
a = F.relu(v)

d_a_b = grad(a, b, retain_graph=True)
d_a_u = grad(a, u, retain_graph=True)
d_a_v = grad(a, v, retain_graph=True)
d_a_w = grad(a, w, retain_graph=True)
d_a_x = grad(a, x)
    

for name, grad in zip("xwbuv", (d_a_x, d_a_w, d_a_b, d_a_u, d_a_v)):
    print('d_a_%s:' % name, grad)

d_a_x: (tensor([2.]),)
d_a_w: (tensor([3.]),)
d_a_b: (tensor([1.]),)
d_a_u: (tensor([1.]),)
d_a_v: (tensor([1.]),)


In [3]:
x = torch.tensor([3.], requires_grad=True)
w = torch.tensor([2.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

u = x * w
v = u + b
a = F.relu(v)

partial_derivatives = grad(a, (x, w, b, u, v))

for name, grad in zip("xwbuv", (partial_derivatives)):
    print('d_a_%s:' % name, grad)


TypeError: 'tuple' object is not callable

#### via retain_grad

对于不是通过torch.tensor(requires_grad=True)定义的tensor，pytorch不会存储其grad，需要使用retain_grad来存储梯度

In [4]:
x = torch.tensor([3.], requires_grad=True)
w = torch.tensor([2.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

u = x * w
v = u + b
a = F.relu(v)

u.retain_grad()
v.retain_grad()

a.backward()

for name, var in zip("xwbuv", (x, w, b, u, v)):
    print('d_a_%s:' % name, var.grad)

d_a_x: tensor([2.])
d_a_w: tensor([3.])
d_a_b: tensor([1.])
d_a_u: tensor([1.])
d_a_v: tensor([1.])


#### Using Hooks

不建议使用

In [5]:
import torch
import torch.nn.functional as F


grads = {}
def save_grad(name):
    def hook(grad):
        grads[name] = grad
    return hook


x = torch.tensor([3.], requires_grad=True)
w = torch.tensor([2.], requires_grad=True)
b = torch.tensor([1.], requires_grad=True)

u = x * w
v = u + b

x.register_hook(save_grad('d_a_x'))
w.register_hook(save_grad('d_a_w'))
b.register_hook(save_grad('d_a_b'))
u.register_hook(save_grad('d_a_u'))
v.register_hook(save_grad('d_a_v'))

a = F.relu(v)

a.backward()

grads

{'d_a_v': tensor([1.]),
 'd_a_b': tensor([1.]),
 'd_a_u': tensor([1.]),
 'd_a_x': tensor([2.]),
 'd_a_w': tensor([3.])}