In [1]:
import torch
import numpy as np

In [9]:
x = torch.randn(3, requires_grad=True) # x ~ N(0, 1)
print(x)
y = x + 2
print(y)
z = y * y * 2
z = z.mean() # z is a scalar
print(z)
z.backward() # dz/dx
print(x.grad)

tensor([-0.1927, -0.1789,  0.5723], requires_grad=True)
tensor([1.8073, 1.8211, 2.5723], grad_fn=<AddBackward0>)
tensor(8.7997, grad_fn=<MeanBackward0>)
tensor([2.4097, 2.4282, 3.4297])


In [11]:
x = torch.randn(3, requires_grad=True) # x ~ N(0, 1)
print(x)
y = x + 2
print(y)
z = y * y * 2 # z is a tensor
print(z)
v = torch.tensor([0.1, 1, 0.001], dtype=torch.float32)
z.backward(v) # dz/dx
print(x.grad)

tensor([-0.0570, -0.2110, -0.2370], requires_grad=True)
tensor([1.9430, 1.7890, 1.7630], grad_fn=<AddBackward0>)
tensor([7.5505, 6.4012, 6.2161], grad_fn=<MulBackward0>)
tensor([7.7720e-01, 7.1561e+00, 7.0519e-03])


## Convert to requires_grad=False

In [12]:
x = torch.randn(3, requires_grad=True) # x ~ N(0, 1)
print(x)
x.requires_grad_(False)
print(x)

tensor([ 1.1004,  3.0825, -0.8442], requires_grad=True)
tensor([ 1.1004,  3.0825, -0.8442])


In [13]:
x = torch.randn(3, requires_grad=True) # x ~ N(0, 1)
print(x)
y = x.detach()
print(y)

tensor([ 1.3086, -0.4258, -0.8347], requires_grad=True)
tensor([ 1.3086, -0.4258, -0.8347])


In [15]:
x = torch.randn(3, requires_grad=True) # x ~ N(0, 1)
print(x)
with torch.no_grad():
    y = x + 2
    print(y)

tensor([-2.6077,  1.1148, -0.6651], requires_grad=True)
tensor([-0.6077,  3.1148,  1.3349])


In [19]:
### fake training session
# grad will accumulate
weights = torch.ones(4, requires_grad=True)
for epoch in range(3):
    model_output = (weights * 3).sum()
    model_output.backward()
    print(weights.grad)

print()

# this one does not accumulate
weights = torch.ones(4, requires_grad=True)
for epoch in range(3):
    model_output = (weights * 3).sum()
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_()

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


In [None]:
### clear optimizer gradient as well
weights = torch.ones(4, requires_grad=True)
optimizer = torch.optim.SGD(weights, lr=0.01)
optimizer.step()
optimizer.zero_grad()