In [1]:
import numpy as np
import torch

In [2]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')

In [3]:
# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

In [4]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


### Initialize Random Weights

In [5]:
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
print(w)
print(b)

tensor([[ 1.5977,  0.1375, -0.1016],
        [ 0.0372, -0.2016, -0.7292]], requires_grad=True)
tensor([ 1.2855, -0.7416], requires_grad=True)


### Linear Model

In [6]:
# @ is dot product
def model(x):
    return x @ w.t() + b

In [7]:
preds = model(inputs)
print(preds)

tensor([[122.7577, -42.8903],
        [152.2690, -61.7681],
        [152.8125, -66.8153],
        [166.4002, -32.5981],
        [117.6104, -68.5744]], grad_fn=<AddBackward0>)


In [8]:
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


### MSE Loss

In [9]:
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

In [10]:
loss = mse(preds, targets)
print(loss)

tensor(15093.5889, grad_fn=<DivBackward0>)


### Back Propagation

In [11]:
loss.backward()

In [12]:
print(w)
print(w.grad)

tensor([[ 1.5977,  0.1375, -0.1016],
        [ 0.0372, -0.2016, -0.7292]], requires_grad=True)
tensor([[  6007.4834,   4577.4238,   3151.6914],
        [-12095.6914, -13932.4707,  -8513.2129]])


### Adjust Weights 

##### Use torch.no_grad to indicate to PyTorch that we shouldn't track, calculate, or modify gradients while updating the weights and biases

In [13]:
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5

In [14]:
preds = model(inputs)

In [15]:
loss = mse(preds, targets)
print(loss)

tensor(10722.5439, grad_fn=<DivBackward0>)


##### Reset the gradients to zero by invoking the .zero_() method. We need to do this because PyTorch accumulates gradients. Otherwise, the next time we invoke .backward on the loss, the new gradient values are added to the existing gradients, which may lead to unexpected results

In [16]:
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


In [17]:
for i in range(100):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [18]:
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(544.5877, grad_fn=<DivBackward0>)


In [19]:
preds

tensor([[ 66.4827,  75.3467],
        [ 83.0661,  96.5342],
        [101.7638, 134.2639],
        [ 73.4466,  66.2502],
        [ 73.1087,  94.7371]], grad_fn=<AddBackward0>)

In [20]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])