### Autograd

In [8]:
import torch

x = torch.randn(2, requires_grad=True)
print(x)

tensor([-1.2882, -0.1838], requires_grad=True)


In [9]:
y = x+2 # this will create computational graph
# for each operation there will be a node with inputs and outputs
# first we do forward pass --> this will give y
# and the for backward --> grad_fn dy/dx
print(y)

tensor([0.7118, 1.8162], grad_fn=<AddBackward0>)


In [10]:
z = y * y * 2
print(z)

tensor([1.0132, 6.5972], grad_fn=<MulBackward0>)


In [11]:
z = z.mean()
print(z)

tensor(3.8052, grad_fn=<MeanBackward0>)


In [12]:
z.backward() #  this will then calculate gradients with respect to x
print(x.grad)

tensor([1.4235, 3.6324])


In [19]:
# let's start with forward propagation
import torch

# initialized 4 scalar tensor
a = torch.tensor([2.], requires_grad=True)
b = torch.tensor([-4.], requires_grad=True)
c = torch.tensor([-2.], requires_grad=True)
d = torch.tensor([2.], requires_grad=True)

# now we do further operations
e = a + b
f = c * d

# further opertatin
g = e * f

# display the outputs of each steps
print(e,f,g)

tensor([-2.], grad_fn=<AddBackward0>) tensor([-4.], grad_fn=<MulBackward0>) tensor([8.], grad_fn=<MulBackward0>)


In [17]:
# during these operations, the code is converted to computational graphs
# these computational graphs help to perform automatic differenations and calculate gradients for back propagation

In [20]:
g.backward() # calculates backward propagationo gradients
print(a.grad.data)
print(b.grad.data)
print(c.grad.data)
print(d.grad.data)

tensor([-4.])
tensor([-4.])
tensor([-4.])
tensor([4.])


In [21]:
# to remove gradients and calculation of gradients 
# a.requires_grad_(False)
# a.detach()
# with torch.no_grad():
    

### Gradient Accumulation

In [24]:
import torch
weights = torch.ones(4, requires_grad=True)
for epoch in range(2):
    model_output = (weights*3).sum()
    model_output.backward()
    print(weights.grad)
        

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])


In [26]:
# to remove gradients accumulation
import torch
weights = torch.ones(4, requires_grad=True)
for epoch in range(2):
    model_output = (weights*3).sum()
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_() # clears gradients

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


### Backpropagations

1. Forward Pass
2. Compute Local Gradients
3. Backward Pass --> compute d Loss / d Weights using the chain rule

![Backpropagation example with linear regression algorithm](./images/01-backpropagation-tutorial-with-linear-regression.png)

The process is to minimize d Loss / d weights
![Backpropagation process](./images/02-backpropagation-process.png)

#### Let's see an example and code for this

![Backpropagation with numerical exampls](./images/03-backpropagation-with-numerical-example.png)

In [31]:
import torch
x = torch.tensor(1.0)
y = torch.tensor(2.0)
w = torch.tensor(1.0, requires_grad=True)

# forward pass and compute the loss
y_hat = w * x
loss = (y_hat - y)**2

print(loss)

# backward pass
loss.backward()
print(w.grad) # dloss/dw

## update our weights
## next forward and backward pass

tensor(1., grad_fn=<PowBackward0>)
tensor(-2.)
