In [1]:
import torch

In [2]:
x = torch.rand(3,requires_grad = True)
print(x)

tensor([0.1579, 0.2922, 0.6932], requires_grad=True)


In [3]:
y = x+2
print(y)

tensor([2.1579, 2.2922, 2.6932], grad_fn=<AddBackward0>)


In [4]:
z = y*y
print(z)

tensor([4.6564, 5.2544, 7.2534], grad_fn=<MulBackward0>)


In [5]:
z = z.mean() # f(x,y,z) = (x + y+ z)/3
z.backward() #dz/dx
print(x.grad)

tensor([1.4386, 1.5282, 1.7955])


In [7]:
# for a vector field, we can't directly use .backward() to find gradient
p = torch.rand(3,requires_grad = True)
print(p)

tensor([0.5547, 0.0879, 0.2193], requires_grad=True)


In [8]:
q = p*p
print(q)
v = torch.tensor([0.1,1,0.001],dtype = torch.float32)
q.backward(v)
print(p.grad)

tensor([0.3077, 0.0077, 0.0481], grad_fn=<MulBackward0>)
tensor([0.1109, 0.1759, 0.0004])


In [10]:
# for a function differently associated with p, we now cannot calculte its gradient as this has already been done for q, so we need to detach the reuqires_grad attribute and set it false
# the gradient for q wrt p has been stored in the .grad attribute of p
p.requires_grad_(False)
print(p)

tensor([0.5547, 0.0879, 0.2193])


In [11]:
# or we could
p = p.detach()
print(p)

tensor([0.5547, 0.0879, 0.2193])


In [12]:
# or
with torch.no_grad():
    y  = x+2
print(y)

tensor([2.1579, 2.2922, 2.6932])


In [13]:
with torch.no_grad():
    p = p
print(p)

tensor([0.5547, 0.0879, 0.2193])


In [14]:
# so these we the three ways in which we can stop pytorch from creating gradient functions and tracking the history of our computational graph


In [26]:
weights= torch.ones(4,requires_grad = True)
print(weights)
for epoch in range(3):
    model_output = (weights*3).sum()
    
model_output.backward()
print(weights.grad)

tensor([1., 1., 1., 1.], requires_grad=True)
tensor([3., 3., 3., 3.])


In [24]:
weights= torch.ones(4,requires_grad = True)
print(weights)
for epoch in range(3):
    model_output = (weights*3).sum()
    model_output.backward()
    print(weights.grad)
# as the backwards operation is in the loop the gradient is added to the previous ones

tensor([1., 1., 1., 1.], requires_grad=True)
tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])


In [28]:
weights= torch.ones(4,requires_grad = True)
print(weights)
for epoch in range(3):
    model_output = (weights*3).sum()
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_()
# now we repeatedly make the gradient zero so that the accumulation does nnot happen

tensor([1., 1., 1., 1.], requires_grad=True)
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


In [None]:
optimizer = torch.optim.SGD(weights,lr = 0.01)
