In [2]:
import torch
import numpy as np

In [5]:
# Tell pytorch it will need to calculate the gradients for the tensor later
# This is necessary for variable optimization
x = torch.randn(3, requires_grad=True)
print(x)

tensor([ 2.5124, -0.3231, -1.8432], requires_grad=True)


In [14]:
# Creates a computational graph - in this case a '+' node with inputs x and 2, and output y
y = x + 2
print(y)

tensor([4.5124, 1.6769, 0.1568], grad_fn=<AddBackward0>)


In [15]:
# Forward pass calculates the output and stores the function which is used in the backprop to calculate the gradient
z = y * y * 2
print(z)

z = z.mean()
print(z)

z.backward() # calculate the gradient of z wrt x: dz/dx
print(x.grad)

tensor([40.7239,  5.6242,  0.0492], grad_fn=<MulBackward0>)
tensor(15.4658, grad_fn=<MeanBackward0>)
tensor([18.0497,  6.7077,  0.6272])


In [18]:
z = y * y * 2
print(z)

# z.backward() # grad can be implicitly created only for scalar outputs

# so we have to provide a vector of the same shape, as it is a vector jacobian product
v = torch.tensor([0.1, 1.0, 0.001], dtype=torch.float32)
z.backward(v)

print(x.grad)

tensor([40.7239,  5.6242,  0.0492], grad_fn=<MulBackward0>)
tensor([21.6596, 20.1231,  0.6285])


In [30]:
x = torch.randn(3, requires_grad=True)
print(x)

# Prevent the tracking of grads
y = x.detach()
print(y)

x.requires_grad_(False)
print(x)

x = torch.randn(3, requires_grad=True)

with torch.no_grad():
    y = x + 2
    print(y)

tensor([ 0.2911, -0.2769, -0.3694], requires_grad=True)
tensor([ 0.2911, -0.2769, -0.3694])
tensor([ 0.2911, -0.2769, -0.3694])
tensor([2.0294, 1.1699, 2.9281])


In [35]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(1):
    model_output = (weights * 3).sum()

    model_output.backward()

    print(weights.grad)

weights = torch.ones(4, requires_grad=True)

print()

# The gradient values accumulate
for epoch in range(3):
    model_output = (weights * 3).sum()

    model_output.backward()

    print(weights.grad)

print()


weights = torch.ones(4, requires_grad=True)

# To stop the gradients accumulating we need to zero them each epoch
for epoch in range(3):
    model_output = (weights * 3).sum()

    model_output.backward()

    print(weights.grad)

    weights.grad.zero_()

tensor([3., 3., 3., 3.])

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


In [37]:
# Zeroing grads IRL
# optimizer = torch.optim.SGD(weights, lr=0.01)
# optimizer.step()
# optimizer.zero_grad()