In [1]:
# Automatic Differentiation with torch.autograd

import torch

x = torch.ones(5)  # input tensor
y = torch.zeros(3)  # expected output
w = torch.randn(5, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)
z = torch.matmul(x, w)+b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)
loss


tensor(0.6668, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)

In [2]:
print('Gradient function for z =', z.grad_fn)
print('Gradient function for loss =', loss.grad_fn)

Gradient function for z = <AddBackward0 object at 0x7f09442f5820>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward0 object at 0x7f09442f53d0>


In [3]:
# compute the gradient of the loss with respect to w and b
loss.backward()
print('w.grad =', w.grad)
print('b.grad =', b.grad)

w.grad = tensor([[0.0010, 0.0223, 0.2849],
        [0.0010, 0.0223, 0.2849],
        [0.0010, 0.0223, 0.2849],
        [0.0010, 0.0223, 0.2849],
        [0.0010, 0.0223, 0.2849]])
b.grad = tensor([0.0010, 0.0223, 0.2849])


In [4]:
# Disabling Gradient Tracking

z = torch.matmul(x, w)+b
print(z.requires_grad)

with torch.no_grad():
    z = torch.matmul(x, w)+b
print(z.requires_grad)

True
False


In [5]:
# we can alse achieve the same result by using the detach() method
z = torch.matmul(x, w)+b
z_det = z.detach()
print(z_det.requires_grad)

# Reasons you might want to disable gradient tracking
# Mark some parameters as frozen parameters. Commin scenario when finetuning a pretrained network
# TO speed up computation when you are only doing forward pass, cause computations on tensors that do not track gradients would be more efficient.

False
