Follows YouTube PyTorch at <https://www.youtube.com/watch?v=GIsg-ZUy0MY>. 

We begin by importing Numpy and PyTorch

In [1]:
import torch
import numpy as np

# Training Data

In [3]:
# Inputs: Temp, rainfall, humidity
inputs = np.array([[73, 67, 43],
                    [91, 88, 64],
                    [87, 134, 58],
                    [102, 43, 37],
                    [69, 96, 70]], dtype='float32')

In [4]:
# Targets: apples, oranges
targets = np.array([[56, 70],
                    [81, 101],
                    [119, 133],
                    [22, 37],
                    [103, 119]], dtype='float32')

In [5]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
inputs, targets

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.],
         [102.,  43.,  37.],
         [ 69.,  96.,  70.]]),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.],
         [ 22.,  37.],
         [103., 119.]]))

# Linear regression model from scratch

The weights and biases can be represented as matrices. We will initialize them with random variables.

In [19]:
# Weights and biases

w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
w, b

(tensor([[ 0.2496, -0.5734, -0.0785],
         [ 0.0747,  0.4011,  0.1388]], requires_grad=True),
 tensor([ 1.2700, -0.6162], requires_grad=True))

We can define our model as follows:

In [20]:
def model(x):
    return x @ w.t() + b

`@` represents matrix multiplication in pytorch and `.t()` returns the transpose of the tensor.

In [21]:
# Generate predictions
preds = model(inputs)
preds

tensor([[-22.3089,  37.6811],
        [-31.5079,  50.3642],
        [-58.4137,  67.6819],
        [ -0.8377,  29.3896],
        [-42.0570,  52.7616]], grad_fn=<AddBackward0>)

Of course the predictions are far off from the actual correct values. Our weights and biases were randomly initialized.

In [22]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

# Loss function 

 We use **mean squared error** to calculate how well the model is performing.

In [23]:
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

In [24]:
loss = mse(preds, targets)
loss

tensor(8414.9443, grad_fn=<DivBackward0>)

In [25]:
# Compute gradients
loss.backward()

In [26]:
w, w.grad

(tensor([[ 0.2496, -0.5734, -0.0785],
         [ 0.0747,  0.4011,  0.1388]], requires_grad=True),
 tensor([[ -8745.6299, -10765.6660,  -6371.3545],
         [ -3599.3047,  -4412.0156,  -2667.4255]]))

In [27]:
b, b.grad

(tensor([ 1.2700, -0.6162], requires_grad=True),
 tensor([-107.2251,  -44.4243]))

In [28]:
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

In [29]:
preds = model(inputs)
loss = mse(preds, targets)
loss

tensor(5932.8032, grad_fn=<DivBackward0>)

Loss has reduced after on epoch of gradient descent. We will now train multiple epochs.

# Train multiple epochs

In [33]:
# Train for 100 epochs
for i in range(100):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [34]:
preds = model(inputs)
loss = mse(preds, targets)
loss

tensor(80.2015, grad_fn=<DivBackward0>)

In [35]:
preds, targets

(tensor([[ 60.8550,  71.9517],
         [ 82.7685,  98.1023],
         [111.4878, 136.1558],
         [ 41.0978,  46.7907],
         [ 91.4672, 108.8447]], grad_fn=<AddBackward0>),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.],
         [ 22.,  37.],
         [103., 119.]]))