In [54]:
import torch
import numpy as np

In [55]:
x = torch.randn(4)
print(x)
xg = torch.randn(4, requires_grad=True)
print(xg)

tensor([ 1.5220,  0.9450, -0.5174,  0.8648])
tensor([ 0.0071, -0.8699, -0.4042, -0.1628], requires_grad=True)


In [56]:
y = x + 2
print(y)
yg = xg + 2
print(yg)

tensor([3.5220, 2.9450, 1.4826, 2.8648])
tensor([2.0071, 1.1301, 1.5958, 1.8372], grad_fn=<AddBackward0>)


In [60]:
zg = yg * yg * 2
print(zg)

tensor([8.0570, 2.5544, 5.0929, 6.7505], grad_fn=<MulBackward0>)


### grad for Scalar output

In [58]:
zg = zg.mean()
print(zg)

zg.backward() # dz/dx
print(xg.grad)

tensor(5.6137, grad_fn=<MeanBackward0>)
tensor([2.0071, 1.1301, 1.5958, 1.8372])


### grad for Vector output

In [61]:
v = torch.tensor([0.1, 1.0, 0.0, 0.001], dtype=torch.float32)
zg.backward(v) # dz/dx
print(xg.grad)

tensor([2.8100, 5.6506, 1.5958, 1.8445])


##### dz/dx

Jaccobian matrix and Chain rule

J . v

v is the vector above torch.tensor

### detaching requires_grad attribute

In [62]:
print(xg)

tensor([ 0.0071, -0.8699, -0.4042, -0.1628], requires_grad=True)


#### method 1

In [63]:
with torch.no_grad():
    y = xg + 1
    print(y)

tensor([1.0071, 0.1301, 0.5958, 0.8372])


#### method 2

In [64]:
y = xg.detach()
print(y)

tensor([ 0.0071, -0.8699, -0.4042, -0.1628])


#### method 3

In [65]:
xg.requires_grad_(False)

tensor([ 0.0071, -0.8699, -0.4042, -0.1628])

### Example

In [66]:
weights = torch.ones(5, requires_grad=True)
print(weights)

tensor([1., 1., 1., 1., 1.], requires_grad=True)


In [67]:
weights = torch.ones(5, requires_grad=True)
for epoch in range(1):
    model_output = (weights * 3).sum()
    model_output.backward()
    print(weights.grad)

tensor([3., 3., 3., 3., 3.])


#### Important:
must clear ```.grad``` before the next iteration

Example: **Incorrect Output**

In [68]:
weights = torch.ones(5, requires_grad=True)
for epoch in range(3):
    model_output = (weights * 3).sum()
    model_output.backward()
    print(weights.grad)

tensor([3., 3., 3., 3., 3.])
tensor([6., 6., 6., 6., 6.])
tensor([9., 9., 9., 9., 9.])


Example: **Correct Output**

In [69]:
weights = torch.ones(5, requires_grad=True)
for epoch in range(3):
    model_output = (weights * 3).sum()
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_()

tensor([3., 3., 3., 3., 3.])
tensor([3., 3., 3., 3., 3.])
tensor([3., 3., 3., 3., 3.])


Example: **Optimizer**

In [70]:
weights = torch.ones(5, requires_grad=True)
print(type(weights))
optimizer = torch.optim.SGD(weights, lr=0.01)
optimizer.step()
optimizer.zero_grad()

<class 'torch.Tensor'>


TypeError: params argument given to the optimizer should be an iterable of Tensors or dicts, but got torch.FloatTensor

### Backpropagation

let: ```x=1```, ```y=2```, ```w=1``` ,```f=w*x```

In [72]:
x = torch.tensor(1.0)
y = torch.tensor(2.0)

w = torch.tensor(1.0, requires_grad=True)

In [73]:
# 1. forward pass
y_hat = w * x

# loss
loss = (y_hat - y) ** 2

In [74]:
# 3. backward pass
# PyTorch computes 2. local gradients automatically for us
loss.backward()

print(w.grad)

tensor(-2.)


In [75]:
### update weights w and couple of iterations