In [None]:
import torch

In [37]:
# inputs
x = torch.ones(5)
# output
y = torch.zeros(3)

# weights
w = torch.rand(5, 3, requires_grad=True)

# bias
b = torch.rand(3, requires_grad=True)

print("inputs =" , x)
print("output =", y)
print("weight =", w)
print("bias =", b)


inputs = tensor([1., 1., 1., 1., 1.])
output = tensor([0., 0., 0.])
weight = tensor([[0.6337, 0.3109, 0.4552],
        [0.0647, 0.3590, 0.9581],
        [0.4798, 0.6914, 0.1591],
        [0.9993, 0.0491, 0.4362],
        [0.4417, 0.4780, 0.4679]], requires_grad=True)
bias = tensor([0.4493, 0.2337, 0.3934], requires_grad=True)


In [None]:
# find z 
z = torch.matmul(x, w) + b
print(z)

tensor([3.0685, 2.1221, 2.8699], grad_fn=<AddBackward0>)


In [39]:
# apply activation function
y_pred = torch.sigmoid(z)
print(y_pred)

tensor([0.9556, 0.8930, 0.9463], grad_fn=<SigmoidBackward0>)


In [40]:
# find loss
loss = torch.nn.functional.binary_cross_entropy_with_logits(y, y_pred)
print(loss)

tensor(0.6931, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)


In [41]:
print(f"Gradient function for z = {z.grad_fn}")
print(f"Gradient function for loss = {loss.grad_fn}")

Gradient function for z = <AddBackward0 object at 0x1115f4760>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward0 object at 0x1113ef430>


In [42]:
# Apply backward
loss.backward()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
tensor([-0., -0., -0.])


**We can only obtain the grad properties for the leaf nodes of the computational graph, which have requires_grad property set to True. For all other nodes in our graph, gradients will not be available.**

**We can only perform gradient calculations using backward once on a given graph, for performance reasons. If we need to do several backward calls on the same graph, we need to pass retain_graph=True to the backward call.**

# Disabling Gradient Tracking

By default, all tensors with requires_grad=True are tracking their computational history and support gradient computation. However, there are some cases when we do not need to do that, for example, when we have trained the model and just want to apply it to some input data, i.e. we only want to do forward computations through the network. We can stop tracking computations by surrounding our computation code with torch.no_grad() block:

In [44]:
z = torch.matmul(x,w) + b
print(z.requires_grad)

with torch.no_grad():
    z = torch.matmul(x,w)+ b
print(z.requires_grad)

True
False


**Another way to achieve the same result is to use the detach() method on the tensor:**

In [45]:
z = torch.matmul(x,w)+b
z_det = z.detach()
print(z_det.requires_grad)

False


# There are reasons you might want to disable gradient tracking:
*To mark some parameters in your neural network as frozen parameters.*

*To speed up computations when you are only doing forward pass, because computations on tensors that do not track gradients would be more efficient.*