- Function
    - Knows how to compute function in forward direction
    - Knows how to compute derivative in back propagation
    - Reference stored in grad_fn property of a tensor

In [2]:
import torch

x = torch.ones(5)
y = torch.zeros(3)

w = torch.randn(5, 3, requires_grad=True) # Weights (3 layers with 5 neurons each)
print(w)
b = torch.randn(3, requires_grad=True) # Biases (1 per layer)
print(b)
z = torch.matmul(x, w) + b # Forward: Prediction is matrix multiplication of feature and weights with bias addition
print(z)
print()

loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y) # Loss calculated based on prediction, label and the computational graph
print(loss)

print()
print(f"Gradient function for z = {z.grad_fn}")
print(f"Gradient function for loss = {loss.grad_fn}")

tensor([[ 0.8271, -0.0532, -0.9936],
        [ 0.0945, -2.5440,  0.5341],
        [-2.2361, -0.3933, -0.5201],
        [-1.0465, -0.8172, -1.2280],
        [-1.2697,  0.7840, -1.3365]], requires_grad=True)
tensor([ 1.0290, -1.8581,  0.3936], requires_grad=True)
tensor([-2.6016, -4.8817, -3.1505], grad_fn=<AddBackward0>)

tensor(0.0403, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)

Gradient function for z = <AddBackward0 object at 0x000002358A372770>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward0 object at 0x000002358B489A20>


- Calculate derivative of loss function w.r.t params $\frac{\partial loss}{\partial w}$ & $\frac{\partial loss}{\partial b}$

In [3]:
loss.backward()
print(w.grad)
print(b.grad)

tensor([[0.0230, 0.0025, 0.0137],
        [0.0230, 0.0025, 0.0137],
        [0.0230, 0.0025, 0.0137],
        [0.0230, 0.0025, 0.0137],
        [0.0230, 0.0025, 0.0137]])
tensor([0.0230, 0.0025, 0.0137])
