In [1]:
from __future__ import print_function
import torch

### Create Input

In [2]:
a = torch.randn(1, requires_grad = True)
a

tensor([-2.8659], requires_grad=True)

### Create Weights

In [3]:
w1 = torch.randn_like(a, requires_grad = True)
w2 = torch.randn_like(a, requires_grad = True)
w3 = torch.randn_like(a, requires_grad = True)
w4 = torch.randn_like(a, requires_grad = True)

w1, w2, w3, w4

(tensor([-0.7134], requires_grad=True),
 tensor([0.4422], requires_grad=True),
 tensor([2.3746], requires_grad=True),
 tensor([0.9178], requires_grad=True))

### Forward pass

In [4]:
b = w1 * a
c = w2 * a
d = w3*b + w4*c 
d

tensor([3.6919], grad_fn=<AddBackward0>)

### Calculate Loss

In [5]:
expected = 10

L = (expected - d).sum()
L

tensor(6.3081, grad_fn=<SumBackward0>)

### Check no gradients yet

In [6]:
print("The grad fn for a is", a.grad_fn)
print("The grad fn for d is", d.grad_fn)

The grad fn for a is None
The grad fn for d is <AddBackward0 object at 0x128c42290>


In [7]:
w1.grad, w2.grad, w3.grad, w4.grad

(None, None, None, None)

### Back Progagation

In [8]:
L.backward()

In [9]:
w1.grad, w2.grad, w3.grad, w4.grad

(tensor([6.8052]), tensor([2.6302]), tensor([-2.0446]), tensor([1.2673]))

### Gradient Descent

In [10]:
learning_rate = 0.03

w1 = w1 - learning_rate * w1.grad
w2 = w2 - learning_rate * w2.grad
w3 = w3 - learning_rate * w3.grad
w4 = w4 - learning_rate * w4.grad

### Forward pass (again)

In [11]:
b = w1 * a
c = w2 * a
d = w3*b + w4*c 
d

tensor([5.4896], grad_fn=<AddBackward0>)

### Calculate Loss (again)

In [12]:
L = (expected - d).sum()
L

tensor(4.5104, grad_fn=<SumBackward0>)