# PyTorch: freeCodeCamp - Full Course
### Section 2: Linear Regression

Import libraries:

In [32]:
import numpy as np
import torch

Load input data:

In [33]:
inputs = np.array([[73, 67, 43],
                   [91, 88, 64],
                   [87, 134, 58],
                   [102, 43, 37],
                   [69, 96, 70]], dtype='float32')

Load target data:

In [34]:
targets = np.array([[56, 70],
                    [81, 101],
                    [119, 133],
                    [22, 37],
                    [103, 119]], dtype='float32')

Convert to tensors:

In [35]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

Initialize weights and biases (randomly):

In [36]:
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
w, b

(tensor([[-1.3475, -0.0708,  0.7370],
         [ 0.4653, -0.9704,  1.0436]], requires_grad=True),
 tensor([-1.0877, -0.4828], requires_grad=True))

Model:

In [37]:
def model(x):
    return x @ w.t() + b

Run model on input data:

In [38]:
preds = model(inputs)
preds

tensor([[ -72.5049,   13.3403],
        [ -82.7688,   23.2527],
        [ -85.0597,  -29.5078],
        [-114.3032,   43.8601],
        [ -49.2693,   11.5158]], grad_fn=<AddBackward0>)

Compare with targets:

In [39]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

Calculate MSE between model outputs and targets:

In [40]:
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

loss = mse(preds, targets)
loss

tensor(17400.2246, grad_fn=<DivBackward0>)

Computer gradient of loss with respect to parameters:

In [41]:
loss.backward()
w.grad, b.grad

(tensor([[-13289.3037, -14168.8750,  -8708.8887],
         [ -6413.2046,  -8487.5010,  -4821.5435]]),
 tensor([-156.9812,  -79.5078]))

Compute a single gradient update:

In [42]:
with torch.no_grad():   # no gradient tracking
    w -= w.grad * 1e-5  # learning rate
    b -= b.grad * 1e-5  # learning rate
    w.grad.zero_()      # zero gradients
    b.grad.zero_()      # zero gradients

Train the model for 1000 epochs:

In [43]:
for _ in range(1000):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

Evaluate model:

In [44]:
preds = model(inputs)
loss = mse(preds, targets)
print(w)
print(b)
print(loss)

tensor([[-0.4412,  0.7672,  0.8997],
        [-0.3461,  0.7104,  1.1215]], requires_grad=True)
tensor([-1.0798, -0.4890], requires_grad=True)
tensor(5.3296, grad_fn=<DivBackward0>)


In practice, we would not manipulate the data this manually:

In [45]:
inputs = np.array([[73, 67, 43],[91, 88, 64],[87, 134, 58],[102, 43, 37],[69, 96, 70],[74, 66, 43],[91, 87, 65],[88, 134, 59],[101, 44, 37],[68, 96, 71],[73, 66, 44],[92, 87, 64],[87, 135, 57],[103, 43, 36],[68, 97, 70]], dtype='float32')

targets = np.array([[56, 70],[81, 101],[119, 133],[22, 37],[103, 119],[57, 69],[80, 102],[118, 132],[21, 38],[104, 118],[57, 69],[82, 100],[118, 134],[20, 38],[102, 120]], dtype='float32')

Convert to tensors:

In [46]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

We can use a tensor dataset for additional useful functionality:

In [47]:
from torch.utils.data import TensorDataset

train_ds = TensorDataset(inputs, targets)
train_ds[0:3]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

Load data in batches and with shuffling using the data loader:

In [48]:
from torch.utils.data import DataLoader

batch_size = 2
train_dl = DataLoader(train_ds, batch_size, shuffle=True)

for xb, yb in train_dl:
    print(xb)
    print(yb)
    break

tensor([[ 68.,  96.,  71.],
        [ 87., 135.,  57.]])
tensor([[104., 118.],
        [118., 134.]])


Instead of initialising weights and biases manually, we can do this automatically by defining a model:

In [49]:
import torch.nn as nn

model = nn.Linear(3, 2)
print(model.weight)
print(model.bias)

print(list(model.parameters()))

Parameter containing:
tensor([[ 0.1656,  0.2584, -0.1107],
        [ 0.1520,  0.2651, -0.1415]], requires_grad=True)
Parameter containing:
tensor([0.5334, 0.4427], requires_grad=True)
[Parameter containing:
tensor([[ 0.1656,  0.2584, -0.1107],
        [ 0.1520,  0.2651, -0.1415]], requires_grad=True), Parameter containing:
tensor([0.5334, 0.4427], requires_grad=True)]


Enter inputs and obtain predictions:

In [50]:
preds = model(inputs)
preds

tensor([[25.1753, 23.2163],
        [31.2577, 28.5479],
        [43.1465, 40.9830],
        [24.4404, 22.1117],
        [29.0172, 26.4752],
        [25.0825, 23.1032],
        [30.8885, 28.1413],
        [43.2014, 40.9935],
        [24.5332, 22.2247],
        [28.7409, 26.1816],
        [24.8061, 22.8097],
        [31.1649, 28.4348],
        [43.5156, 41.3896],
        [24.7167, 22.4052],
        [29.1100, 26.5882]], grad_fn=<AddmmBackward0>)

Instead of defining the loss function manually, we can use built-in ones:

In [51]:
import torch.nn.functional as F

loss_fn = F.mse_loss
loss = loss_fn(model(inputs), targets)

loss

tensor(3929.1677, grad_fn=<MseLossBackward0>)

Initialise and optimizer:

In [52]:
from torch.optim import SGD

opt = SGD(model.parameters(), lr=1e-5)

Fit the model to the data:

In [55]:
def fit(num_epochs, model, loss_fn, opt, train_dl):
    for epoch in range(num_epochs):     # For each epoch
        for xb, yb in train_dl:         # For each batch
            pred = model(xb)            # Make predictions
            loss = loss_fn(pred, yb)    # Calculate loss
            loss.backward()             # Compute loss gradients
            opt.step()                  # Update model parameters 
            opt.zero_grad()             # Reset gradients
        if (epoch+1) % 10 == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

fit(100, model, loss_fn, opt, train_dl)
loss = loss_fn(model(inputs), targets)

loss

Epoch [10/100], Loss: 3.4481
Epoch [20/100], Loss: 30.5775
Epoch [30/100], Loss: 1.4171
Epoch [40/100], Loss: 7.7226
Epoch [50/100], Loss: 1.1432
Epoch [60/100], Loss: 9.1919
Epoch [70/100], Loss: 0.9749
Epoch [80/100], Loss: 0.7797
Epoch [90/100], Loss: 2.4805
Epoch [100/100], Loss: 3.8318


tensor(3.2207, grad_fn=<MseLossBackward0>)

Compare predictions with targets:

In [58]:
model(inputs) - targets

tensor([[ 1.4922,  0.8055],
        [ 0.5988, -1.3429],
        [ 1.5285,  2.4083],
        [-0.3947,  1.5849],
        [-2.4087, -2.5212],
        [-0.7777,  0.7006],
        [ 1.2939, -2.4552],
        [ 2.7327,  3.8944],
        [ 1.8752,  1.6898],
        [-2.4437, -0.5286],
        [ 0.1873,  1.6933],
        [-1.6711, -1.4479],
        [ 2.8334,  1.5205],
        [ 0.6403, -0.4078],
        [-0.1388, -2.4163]], grad_fn=<SubBackward0>)

Show documentation using the question mark:

In [54]:
?nn.Linear

[0;31mInit signature:[0m
[0mnn[0m[0;34m.[0m[0mLinear[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0min_features[0m[0;34m:[0m [0mint[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mout_features[0m[0;34m:[0m [0mint[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbias[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdevice[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdtype[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0;32mNone[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Applies a linear transformation to the incoming data: :math:`y = xA^T + b`.

This module supports :ref:`TensorFloat32<tf32_on_ampere>`.

On certain ROCm devices, when using float16 inputs this module will use :ref:`different precision<fp16_on_mi200>` for backward.

Args:
    in_features: size of each input sample
    out_features: size of each output sample
    bias: If set 