# Jacobianâ€“Vector Product (JVP)

*Normally, .backward() works when the output is a scalar loss*<br/><br/>

*Here, output is a matrix, not a single number*<br/><br/>

*So PyTorch cannot compute a full Jacobian matrix*<br/><br/>

*Instead, PyTorch computes a Jacobianâ€“Vector Product (JVP)*<br/><br/>

In [67]:
import torch
inp = torch.eye(4, 5, requires_grad=True)
print(inp)

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.]], requires_grad=True)


*Step 1: inp + 1 --- Every element increases by 1* <br/>
*Step 2: .pow(2) --- Square every element*<br/>
*Step 3: .t() --- Transpose the matrix* <br/>
*Original shape: (4 Ã— 5) --- After transpose: (5 Ã— 4)* <br/>
*ðŸ“Œ Important: out is now a matrix, not a scalar*

In [68]:
# create a output tensor
out = (inp+1).pow(2).t()
print(out)

tensor([[4., 1., 1., 1.],
        [1., 4., 1., 1.],
        [1., 1., 4., 1.],
        [1., 1., 1., 4.],
        [1., 1., 1., 1.]], grad_fn=<TBackward0>)


# First backward call (Jacobianâ€“vector product)

In [69]:
out.backward(torch.ones_like(out), retain_graph=True)

*Why do we pass torch.ones_like(out)?*<br/>
*Because: -- out is NOT a scalar, PyTorch needs a vector v*

In [70]:
inp.grad

tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.]])

In [71]:
print(f"First call\n{inp.grad}")


First call
tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.]])


In [72]:
out.backward(torch.ones_like(out), retain_graph=True)
print(f"\nSecond call\n{inp.grad}")



Second call
tensor([[8., 4., 4., 4., 4.],
        [4., 8., 4., 4., 4.],
        [4., 4., 8., 4., 4.],
        [4., 4., 4., 8., 4.]])


In [73]:
inp.grad.zero_()


tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])

In [74]:
input = torch.tensor([2.0, 3.0])
# print(input)
y = torch.tensor(1.0)
w = torch.tensor([1.0, 0.1], requires_grad=True)
b = torch.tensor(0.3, requires_grad=True)
z = torch.dot(input, w) + b
y_pred = torch.relu(z)
loss = torch.nn.functional.binary_cross_entropy_with_logits(y, y_pred)
loss.backward()
print(input.grad)

None


In [75]:
w = torch.rand()

TypeError: rand() received an invalid combination of arguments - got (), but expected one of:
 * (tuple of ints size, *, torch.Generator generator, tuple of names names, torch.dtype dtype = None, torch.layout layout = None, torch.device device = None, bool pin_memory = False, bool requires_grad = False)
 * (tuple of ints size, *, torch.Generator generator, Tensor out = None, torch.dtype dtype = None, torch.layout layout = None, torch.device device = None, bool pin_memory = False, bool requires_grad = False)
 * (tuple of ints size, *, Tensor out = None, torch.dtype dtype = None, torch.layout layout = None, torch.device device = None, bool pin_memory = False, bool requires_grad = False)
 * (tuple of ints size, *, tuple of names names, torch.dtype dtype = None, torch.layout layout = None, torch.device device = None, bool pin_memory = False, bool requires_grad = False)


In [None]:
x = torch.ones([5,5])
print(x)
w = torch.rand(5, 6, requires_grad=True)
print(w)
b = torch.zeros( 6, requires_grad=True)
print(b)

z = x @ w+ b
print(z)

tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])
tensor([[0.2414, 0.0740, 0.8841, 0.4452, 0.9664, 0.9470],
        [0.8741, 0.3764, 0.0526, 0.2089, 0.8558, 0.6689],
        [0.7160, 0.8107, 0.6163, 0.7649, 0.6899, 0.0584],
        [0.4551, 0.3484, 0.8497, 0.9476, 0.0980, 0.0871],
        [0.1794, 0.4035, 0.9815, 0.8886, 0.2430, 0.0722]], requires_grad=True)
tensor([0., 0., 0., 0., 0., 0.], requires_grad=True)
tensor([[2.4660, 2.0130, 3.3843, 3.2552, 2.8530, 1.8336],
        [2.4660, 2.0130, 3.3843, 3.2552, 2.8530, 1.8336],
        [2.4660, 2.0130, 3.3843, 3.2552, 2.8530, 1.8336],
        [2.4660, 2.0130, 3.3843, 3.2552, 2.8530, 1.8336],
        [2.4660, 2.0130, 3.3843, 3.2552, 2.8530, 1.8336]],
       grad_fn=<AddBackward0>)


# Example 3

In [None]:
x = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)
y = x**2
z = y.mean() # convert single value
print(z)
z.backward()
print("gradients", x.grad)
x.grad.zero_()

tensor(4.6667, grad_fn=<MeanBackward0>)
gradients tensor([0.6667, 1.3333, 2.0000])


tensor([0., 0., 0.])

# Detach the gradient


In [None]:
x = torch.tensor([1.8, 0.5, 0.9, 0.2], requires_grad=True)
y = (x ** 2).mean()
print("x value with grad -" , x)

y.backward()

z = x.detach()
print("z value after detach - separated from the computational graph - ", z)


x value with grad - tensor([1.8000, 0.5000, 0.9000, 0.2000], requires_grad=True)
z value after detach - separated from the computational graph -  tensor([1.8000, 0.5000, 0.9000, 0.2000])
