In [None]:
import re
import torch


x = torch.randn(3, requires_grad=True)
# requires_grad is set to True, so the gradient will be tracked
# when performing operations on the tensor.
# The tensor will be created with a random value of size 3.

print(x)

y = x + 2
print(y)

z = y * y * 3
z = z.mean()
print(z)

z.backward()
# dz/dx is computed and stored in x.grad.
# The backward() function computes the gradient of z with respect to x, also known as backpropagation.
# uses jacobian-vector product also known as the chain rule to compute the gradient of z with respect to x.

print(x.grad)
# The gradient of z with respect to x is stored in x.grad.

tensor([ 0.1643, -0.1414, -0.1182], requires_grad=True)
tensor([2.1643, 1.8586, 1.8818], grad_fn=<AddBackward0>)
tensor(11.6799, grad_fn=<MeanBackward0>)
tensor([4.3286, 3.7173, 3.7636])


In [None]:
# The gradient is a tensor of the same shape as x, and it contains the partial derivatives of z with respect to each element of x.

# prevent python from tracking history
# This is useful when you want to perform operations on tensors without tracking gradients, such as during inference or when you don't need to compute gradients.

# there are two ways to do this:
# 1. use torch.no_grad() context manager
# The torch.no_grad() context manager temporarily sets all requires_grad flags to false, so that operations performed within the block do not track gradients.
# This is useful for inference or when you want to perform operations without tracking gradients.
with torch.no_grad():
    y = x + 2
    print(y)

# 2. use the detach() method
# The detach() method creates a new tensor that shares the same data as the original tensor but does not require gradients.
# This means that any operations performed on the detached tensor will not be tracked for gradient computation.

# can also use y.require_grad_(False) to set requires_grad to False
# This will not affect the original tensor x, but it will create a new tensor y that does not require gradients.

y = x.detach()
print(y)

tensor([2.1643, 1.8586, 1.8818])
tensor([ 0.1643, -0.1414, -0.1182])
