In [1]:
import torch

In [11]:
x = torch.randn(3, requires_grad = True)
print(x)

tensor([-0.1730,  0.1179,  1.4191], requires_grad=True)


In [12]:
y = x + 2
print(y)

tensor([1.8270, 2.1179, 3.4191], grad_fn=<AddBackward0>)


In [13]:
z = y*y*2
z = z.mean()
print(z)

tensor(13.0092, grad_fn=<MeanBackward0>)


In [14]:
z.backward()
print(x.grad)

tensor([2.4359, 2.8239, 4.5589])


requires_grad=True must be specified to calculate the gradient

We must provide a vector to back propagate.

Vector Jacobian Product.

In [16]:
x = torch.randn(3, requires_grad = True)
print(x)
y = x + 2
print(y)
z = y*y*2
print(z)
v = torch.tensor([0.1, 1.0, 0.001], dtype=torch.float32)
z.backward(v)
print(x.grad)

tensor([-2.7513, -0.6729, -0.6430], requires_grad=True)
tensor([-0.7513,  1.3271,  1.3570], grad_fn=<AddBackward0>)
tensor([1.1289, 3.5226, 3.6830], grad_fn=<MulBackward0>)
tensor([-0.3005,  5.3085,  0.0054])


Whenever we have a trailing underscore(_) in torch, it means that it modifies the existing value to the current value.

In [18]:
x = torch.randn(3, requires_grad = True)
print(x)

x.requires_grad_(False)
print(x)

y = x.detach()
print(y)

with torch.no_grad():
    y = x + 2
    print(y)
    
print(x)

tensor([0.9092, 0.3290, 1.1127], requires_grad=True)
tensor([0.9092, 0.3290, 1.1127])
tensor([0.9092, 0.3290, 1.1127])
tensor([2.9092, 2.3290, 3.1127])
tensor([0.9092, 0.3290, 1.1127])


LETS TRAIN A DUMMY EXAMPLE

In [24]:
weights = torch.ones(4, requires_grad=True)
for epoch in range(5):
    model_output = (weights*3).sum()
    
    model_output.backward()
    
    print(weights.grad)

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])
tensor([12., 12., 12., 12.])
tensor([15., 15., 15., 15.])


In [27]:
weights = torch.ones(4, requires_grad=True)
for epoch in range(5):
    model_output = (weights*3).sum()
    
    model_output.backward()
    
    print(weights.grad)    
    
    #empty the grads
    weights.grad.zero_()

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
