In [2]:
##
"""
Interested in differentiating y = 2XTX
with respect to the column vector X
"""
import torch

x = torch.arange(4.0)
x

tensor([0., 1., 2., 3.])

In [3]:
x.requires_grad_(True)
x.grad

In [4]:
y = 2 * torch.dot(x, x)
y

tensor(28., grad_fn=<MulBackward0>)

In [5]:
y.backward()
x.grad

tensor([ 0.,  4.,  8., 12.])

In [6]:
"""
When y is a vector, the most natural representation of the derivative of y 
with respect to a vector x is a matrix called the Jacobian that contains 
the partial derivatives of each component of y with respect to each component of x. 
"""

x.grad.zero_()
y = x * x
y.backward(gradient=torch.ones(len(y)))  # Faster: y.sum().backward()
x.grad

tensor([0., 2., 4., 6.])