In [1]:
import torch

In [2]:
X = torch.ones(5)
Y = torch.zeros(3)
W = torch.rand((5, 3), requires_grad=True)
B = torch.rand(3, requires_grad=True)
Z = torch.matmul(W.T, X) + B
loss = torch.nn.functional.binary_cross_entropy_with_logits(Z, Y)
print(f"Loss = {loss}")

Loss = 3.6166486740112305


**Compute gradient**

In [4]:
loss.backward()
print(W.grad)
print(B.grad)

tensor([[0.3273, 0.3236, 0.3212],
        [0.3273, 0.3236, 0.3212],
        [0.3273, 0.3236, 0.3212],
        [0.3273, 0.3236, 0.3212],
        [0.3273, 0.3236, 0.3212]])
tensor([0.3273, 0.3236, 0.3212])


**Disable gradient tracking**

In [5]:
Z = torch.matmul(W.T, X) + B
print(Z.requires_grad)

with torch.no_grad():
    Z = torch.matmul(W.T, X) + B
print(Z.requires_grad)

True
False


In [7]:
Z = torch.matmul(W.T, X) + B
Z_new = Z.detach()
print(Z_new.requires_grad)

False


In [16]:
inp = torch.eye(5, requires_grad=True)
out = (inp + 1).pow(2)
out.backward(torch.ones_like(inp), retain_graph=True)
print("--------------------------------------------------")
print("First call:", inp.grad)
print("Inp:", inp)

out.backward(torch.ones_like(inp), retain_graph=True)
print("--------------------------------------------------")
print("Second call:", inp.grad)
print("Inp:", inp)

inp.grad.zero_()
out.backward(torch.ones_like(inp), retain_graph=True)
print("--------------------------------------------------")
print("Third call:", inp.grad)
print("Inp:", inp)

--------------------------------------------------
First call: tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.],
        [2., 2., 2., 2., 4.]])
Inp: tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]], requires_grad=True)
--------------------------------------------------
Second call: tensor([[8., 4., 4., 4., 4.],
        [4., 8., 4., 4., 4.],
        [4., 4., 8., 4., 4.],
        [4., 4., 4., 8., 4.],
        [4., 4., 4., 4., 8.]])
Inp: tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]], requires_grad=True)
--------------------------------------------------
Third call: tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.],
        [2., 2., 2., 2., 4.]])
Inp: 