In [2]:
import torch

In [12]:
# w and b are parameters that we need to optimize
# Compute the gradient of the loss function with respect to those variables
# so we set requiress_grad param to be true

x = torch.ones(5)   # input tensor
y = torch.zeros(3)  # expected output

w = torch.randn(5, 3, requires_grad=True)
print('w: ', w)
b = torch.randn(3, requires_grad=True)
print('b: ', b)

z = torch.matmul(x, w) + b
print('z: ', z)

# We compare and calculated the difference between predicted results: z 
# and the expected result by using cross entropy
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)

w:  tensor([[ 1.0671,  0.4779,  1.5446],
        [ 0.0640, -0.6595, -0.2024],
        [-0.5631, -0.3336, -1.0392],
        [-0.6145,  1.1067,  2.3310],
        [-1.2196, -1.6501,  0.7928]], requires_grad=True)
b:  tensor([-0.4092,  0.9760, -0.6199], requires_grad=True)
z:  tensor([-1.6753, -0.0827,  2.8069], grad_fn=<AddBackward0>)


In [6]:
torch.matmul(x, y)

tensor(0.)

In [15]:
print(loss)

tensor(1.2299, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)


In [13]:
print('Gradient fucntion for z = ', z.grad_fn)

# A reference to the backward propagation function is stored in grad_fn property of a tensor
print('Gradient function for loss = ', loss.grad_fn)

Gradient fucntion for z =  <AddBackward0 object at 0x7f43a9e0db10>
Gradient function for loss =  <BinaryCrossEntropyWithLogitsBackward0 object at 0x7f43a9e0d090>


In [None]:
# Compute derivatives of loss function by backward() with respect to w and b
loss.backward()

# Retrieve the values from w.grad and b.grad
print(w.grad)
print(b.grad)

In [None]:
'''
Notes:
By default, all tensors with requires_grad=True
are tracking their computational history and 
support gradient computation.
'''

# Disable Gradient Tracking

In [None]:
# Stop gradient tracking computation by torch.no_grad

z = torch.matmul(x, w) + b
print(z.requires_grad)

# Method 1
with torch.no_grad():
    z = torch.matmul(x, w) + b
print(z.requires_grad)

# Method 2
z_det = z.detach()

# Result:
# True
# False

In [None]:
'''
Reasons you might want to disable gradient tracking:
1. To mark some parameters in your neural network as frozen parameters. This is a very common scenario for finetuning a pretrained network
2. To speed up computations when you are only doing forward pass, because computations on tensors that do not track gradients would be more efficient.
'''