In [1]:
import torch

In [2]:
xs = torch.tensor([0, 1, 2, 3, 4, 5, 6, 7.])
xs

tensor([0., 1., 2., 3., 4., 5., 6., 7.])

In [3]:
ys = torch.tensor([1.86, 1.31, .62, .33, .09, -.67, -1.23, -1.37])
ys

tensor([ 1.8600,  1.3100,  0.6200,  0.3300,  0.0900, -0.6700, -1.2300, -1.3700])

In [4]:
def regression(my_x, my_m, my_b):
    return my_m*my_x + my_b

In [5]:
m = torch.tensor([0.9]).requires_grad_()

In [6]:
b = torch.tensor([0.1]).requires_grad_()

In [7]:
i = 7
x = xs[i]
y = ys[i]

In [8]:
x

tensor(7.)

In [9]:
y

tensor(-1.3700)

Step 1: Forward pass - we can flow the scalar tensor x through our regression model to produce y-hat (which is an estimate of y). Prior to any model training, this is an arbitrary value.

In [10]:
yhat = regression(x, m, b)
yhat

tensor([6.4000], grad_fn=<AddBackward0>)

Step 2: Compare yhat with the true y to calculate cost C.

In [11]:
def squared_error(my_yhat, my_y):
    return (my_yhat - my_y) ** 2

In [12]:
C = squared_error(yhat, y)
C

tensor([60.3729], grad_fn=<PowBackward0>)

Step 3: Use autodiff to calculate gradient of C with respect to parameters.

In [13]:
C.backward()

In [14]:
m.grad

tensor([108.7800])

In [15]:
b.grad

tensor([15.5400])

In [16]:
2 * x * (yhat.item() - y)

tensor(108.7800)

In [17]:
2 * (yhat.item() - y)

tensor(15.5400)

The Gradiant of Cost (Delta C) - is a vector of all partial derivatives of C with respect to each of the individual model parameters. 

In [18]:
gradient = torch.tensor([[b.grad.item(), m.grad.item()]]).T
gradient

tensor([[ 15.5400],
        [108.7800]])