In [1]:
import torch

## Derivatives using Autograd

In [6]:
x = 2.0
y = x**3

def dydx(x):
  return 3*x**2

dydx(x)

12.0

In [44]:
x = torch.tensor(2.0,requires_grad=True) # required grad helps to track the functions applied on x and x should be float
x

tensor(2., requires_grad=True)

In [45]:
y = x**3
y

tensor(8., grad_fn=<PowBackward0>)

In [46]:
y.backward() # no manual dy/dx func required
x.grad

tensor(12.)

In [62]:
x = torch.tensor(2.0,requires_grad=True)
y = x**3
z = torch.sin(y)
z

tensor(0.9894, grad_fn=<SinBackward0>)

In [63]:
x

tensor(2., requires_grad=True)

In [64]:
y

tensor(8., grad_fn=<PowBackward0>)

In [65]:
z.backward()
x.grad #backward function run only once for a varible because of gradient accumulation and only for the leaf or last variable

tensor(-1.7460)

### Single sigmoid neuron

In [82]:
x = torch.tensor(6.7)
y = torch.tensor(0.0)

w = torch.tensor(1.0, requires_grad=True)
b = torch.tensor(0.0, requires_grad=True)

In [83]:
z = w*x+b
z

tensor(6.7000, grad_fn=<AddBackward0>)

In [84]:
ypred = torch.sigmoid(z)
ypred

tensor(0.9988, grad_fn=<SigmoidBackward0>)

In [85]:
L = -(y*torch.log(ypred)+(1-y)*torch.log(1-ypred))
L

tensor(6.7012, grad_fn=<NegBackward0>)

In [86]:
L.backward()
print(w.grad)
print(b.grad)

tensor(6.6918)
tensor(0.9988)


In [95]:
x = torch.tensor([6.7,8.1,9.0])
y = torch.tensor([0.0,1.0,1.0])

w = torch.tensor(1.0, requires_grad=True)
b = torch.tensor(0.0, requires_grad=True)

In [96]:
z = w*x+b
z

tensor([6.7000, 8.1000, 9.0000], grad_fn=<AddBackward0>)

In [97]:
ypred = torch.sigmoid(z)
ypred

tensor([0.9988, 0.9997, 0.9999], grad_fn=<SigmoidBackward0>)

In [98]:
L = -(y*torch.log(ypred)+(1-y)*torch.log(1-ypred))
L = torch.sum(L)
L

tensor(6.7016, grad_fn=<SumBackward0>)

In [99]:
L.backward()
print(w.grad)
print(b.grad)

tensor(6.6882)
tensor(0.9983)


### Gradient Accumulation

In [107]:
x = torch.tensor(2.0,requires_grad=True)

In [108]:
for i in range(5):
  y = x**3
  y.backward()
  print(f'Gradient of X at {i} is {x.grad}') # problem during multiple forward and backward pass in nn training

Gradient of X at 0 is 12.0
Gradient of X at 1 is 24.0
Gradient of X at 2 is 36.0
Gradient of X at 3 is 48.0
Gradient of X at 4 is 60.0


In [111]:
x = torch.tensor(2.0,requires_grad=True)

for i in range(5):
  y = x**3
  y.backward()
  print(f'Gradient of X at {i} is {x.grad}')
  x.grad.zero_() # stops gradient accumulation

Gradient of X at 0 is 12.0
Gradient of X at 1 is 12.0
Gradient of X at 2 is 12.0
Gradient of X at 3 is 12.0
Gradient of X at 4 is 12.0


### Stopping gradient tracking

In [118]:
# gradient tracking is stopped after training

# using require_gradient_
x.requires_grad_(False)

tensor(2.)

In [119]:
# using detach

x = torch.tensor(2.0,requires_grad=True)
z = x.detach()
z

tensor(2.)

In [120]:
# using torch.no_grad

x = torch.tensor(2.0,requires_grad=True)

with torch.no_grad():
  y = x**3
  print(y)

tensor(8.)
