In [1]:
#define differention
def dy_dx_x2(x):
    return 2*x

print(dy_dx_x2(3.0))  # expect 6.0
print(dy_dx_x2(5.0))  # expect 10.0

6.0
10.0


In [5]:
#we will use autograd to compute the derivative and store it in a tensor
import torch
X=torch.tensor(3.0, requires_grad=True) # we did require_grad=True to track the computation
Y=X**4
print(Y,X)

Y.backward() # this will compute the derivative
X.grad



tensor(81., grad_fn=<PowBackward0>) tensor(3., requires_grad=True)


tensor(108.)

In [6]:
#simple eg of forward pass then backward pass to compute gradients
import torch
x=torch.tensor(2.0, requires_grad=True)
y= x**3 + 3*x**2 + 2*x + 1
print(y)
y.backward()  # compute gradients
print(x.grad)  # dy/dx at x=2.0

tensor(25., grad_fn=<AddBackward0>)
tensor(26.)


In [12]:
#but changing tensor values keep accumulating gradients
x_=torch.tensor(2.0, requires_grad=True)
y= x_**3 + 3*x_**2 + 2*x_ + 1
print(y)
y.backward()  # compute gradients
print(x_.grad)  # dy/dx at x=2.0
y= x_**2
y.backward()
print(x_.grad)  # dy/dx at x=2.0

#that's why we need to zero the gradients after every step
x_=torch.tensor(2.0, requires_grad=True)
y= x_**3 + 3*x_**2 + 2*x_ + 1
print(y)
y.backward()  # compute gradients
print(x_.grad)  # dy/dx at x=2.0
x_.grad.zero_()  # zero the gradients
y= x_**2
y.backward()
print(x_.grad)  # dy/dx at x=2.0

tensor(25., grad_fn=<AddBackward0>)
tensor(26.)
tensor(30.)
tensor(25., grad_fn=<AddBackward0>)
tensor(26.)
tensor(4.)


In [14]:
# Manually its hard and if more nested functions are there it gets messier
x=torch.tensor(2.0, requires_grad=True)
y= x**3 + 3*x**2 + 2*x + 1
print(y)
z= torch.sin(y) + torch.log(y)
print(z)
dz_dy= torch.cos(y) + 1/y
dy_dx= 3*x**2 + 6*x + 2
dz_dx= dz_dy * dy_dx
print(dz_dx)  # dz/dx at x=2.0
x.grad=None  # zero the gradients

print("----using autograd----" )
# autograd does this for us
x=torch.tensor(2.0, requires_grad=True)
y= x**3 + 3*x**2 + 2*x + 1
print(y)
z= torch.sin(y) + torch.log(y)
print(z)
z.backward()  # compute gradients
print(x.grad)  # dz/dx at x=2.0
x.grad=None  # zero the gradients

tensor(25., grad_fn=<AddBackward0>)
tensor(3.0865, grad_fn=<AddBackward0>)
tensor(26.8113, grad_fn=<MulBackward0>)
----using autograd----
tensor(25., grad_fn=<AddBackward0>)
tensor(3.0865, grad_fn=<AddBackward0>)
tensor(26.8113)


In [None]:
#Detach: to stop tracking history
# three ways to detach
#approach 1: using detach(), approach 2: using with torch.no_grad() context, approach 3: by setting requires_grad=False
xa=torch.tensor(2.0, requires_grad=True)
y= xa**3 + 3*xa**2 + 2*xa + 1
print(y)
#approach 1
y_detached= y.detach()
z= torch.sin(y_detached) + torch.log(y_detached)
print(z)
z.backward()  # compute gradients

#approach 2
xb=torch.tensor(2.0, requires_grad=True)
yb= xb**3 + 3*xb**2 + 2*xb + 1
print(yb)
with torch.no_grad():
    yb_detached= yb
    zb= torch.sin(yb_detached) + torch.log(yb_detached)
    print(zb)
zb.backward()  # compute gradients
print(xb.grad)  # dz/dx at x=2.0

#approach 3
xc=torch.tensor(2.0, requires_grad=True)
yc= xc**3 + 3*xc**2 + 2*xc + 1
print(yc)
yc.requires_grad_(False)  # detach by setting requires_grad=False
zc= torch.sin(yc) + torch.log(yc)
print(zc)
zc.backward()  # compute gradients  
print(xc.grad)  # dz/dx at x=2.0

#ALL will give same result that Z will not track gradients and give error if we try to call backward on it

tensor(25., grad_fn=<AddBackward0>)
tensor(3.0865)


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn