# PyTorch: freeCodeCamp - Full Course
### Section 2: Linear Regression

Import libraries:

In [None]:
import numpy as np
import torch

Load input data:

In [None]:
inputs = np.array([[73, 67, 43],
                   [91, 88, 64],
                   [87, 134, 58],
                   [102, 43, 37],
                   [69, 96, 70]], dtype='float32')

Load target data:

In [None]:
targets = np.array([[56, 70],
                    [81, 101],
                    [119, 133],
                    [22, 37],
                    [103, 119]], dtype='float32')

Convert to tensors:

In [None]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

Initialize weights and biases (randomly):

In [None]:
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
w, b

(tensor([[ 0.5942,  0.5742, -0.6547],
         [ 0.4428, -0.0141, -0.1628]], requires_grad=True),
 tensor([ 1.5261, -1.0787], requires_grad=True))

Model:

In [None]:
def model(x):
    return x @ w.t() + b

Run model on input data:

In [None]:
preds = model(inputs)
preds

tensor([[55.2278, 23.3002],
        [64.2345, 27.5556],
        [92.2008, 26.1111],
        [62.6075, 37.4577],
        [51.8268, 16.7239]], grad_fn=<AddBackward0>)

Compare with targets:

In [None]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

Calculate MSE between model outputs and targets:

In [None]:
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

loss = mse(preds, targets)
loss

tensor(3472.8340, grad_fn=<DivBackward0>)

Computer gradient of loss with respect to parameters:

In [None]:
loss.backward()
w.grad, b.grad

(tensor([[ -660.5095, -1656.9398,  -948.0393],
         [-5280.4443, -6742.7861, -4010.0957]]),
 tensor([-10.9805, -65.7703]))

Compute a single gradient update:

In [None]:
with torch.no_grad():   # no gradient tracking
    w -= w.grad * 1e-5  # learning rate
    b -= b.grad * 1e-5  # learning rate
    w.grad.zero_()      # zero gradients
    b.grad.zero_()      # zero gradients

Train the model for 1000 epochs:

In [None]:
for _ in range(1000):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

Evaluate model:

In [None]:
preds = model(inputs)
loss = mse(preds, targets)
print(w)
print(b)
print(loss)

tensor([[-0.2969,  1.0341,  0.2030],
        [-0.2196,  0.9126,  0.6136]], requires_grad=True)
tensor([ 1.5245, -1.0790], requires_grad=True)
tensor(18.4960, grad_fn=<DivBackward0>)


In practice, we would not manipulate the data this manually:

In [None]:
inputs = np.array([[73, 67, 43],[91, 88, 64],[87, 134, 58],[102, 43, 37],[69, 96, 70],[74, 66, 43],[91, 87, 65],[88, 134, 59],[101, 44, 37],[68, 96, 71],[73, 66, 44],[92, 87, 64],[87, 135, 57],[103, 43, 36],[68, 97, 70]], dtype='float32')

targets = np.array([[56, 70],[81, 101],[119, 133],[22, 37],[103, 119],[57, 69],[80, 102],[118, 132],[21, 38],[104, 118],[57, 69],[82, 100],[118, 134],[20, 38],[102, 120]], dtype='float32')

Convert to tensors:

In [None]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

We can use a tensor dataset for additional useful functionality:

In [None]:
from torch.utils.data import TensorDataset

train_ds = TensorDataset(inputs, targets)
train_ds[0:3]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

Load data in batches and with shuffling using the data loader:

In [None]:
from torch.utils.data import DataLoader

batch_size = 2
train_dl = DataLoader(train_ds, batch_size, shuffle=True)

for xb, yb in train_dl:
    print(xb)
    print(yb)
    break

tensor([[102.,  43.,  37.],
        [ 73.,  66.,  44.]])
tensor([[22., 37.],
        [57., 69.]])


Instead of initialising weights and biases manually, we can do this automatically by defining a model:

In [None]:
import torch.nn as nn

model = nn.Linear(3, 2)
print(model.weight)
print(model.bias)

print(list(model.parameters()))

Parameter containing:
tensor([[ 0.3161,  0.2886, -0.0400],
        [ 0.2829,  0.2267, -0.2339]], requires_grad=True)
Parameter containing:
tensor([-0.5491, -0.3985], requires_grad=True)
[Parameter containing:
tensor([[ 0.3161,  0.2886, -0.0400],
        [ 0.2829,  0.2267, -0.2339]], requires_grad=True), Parameter containing:
tensor([-0.5491, -0.3985], requires_grad=True)]


Enter inputs and obtain predictions:

In [None]:
preds = model(inputs)
preds

tensor([[40.1437, 25.3876],
        [51.0548, 30.3294],
        [63.3066, 41.0295],
        [42.6233, 29.5551],
        [46.1700, 24.5150],
        [40.1712, 25.4438],
        [50.7263, 29.8688],
        [63.5827, 41.0785],
        [42.5958, 29.4989],
        [45.8140, 23.9982],
        [39.8151, 24.9270],
        [51.0823, 30.3856],
        [63.6351, 41.4901],
        [42.9793, 30.0720],
        [46.1426, 24.4588]], grad_fn=<AddmmBackward0>)

Instead of defining the loss function manually, we can use built-in ones:

In [None]:
import torch.nn.functional as F

loss_fn = F.mse_loss
loss = loss_fn(model(inputs), targets)

loss

tensor(3231.7385, grad_fn=<MseLossBackward0>)

Initialise and optimizer:

In [None]:
from torch.optim import SGD

opt = SGD(model.parameters(), lr=1e-5)

Fit the model to the data:

In [None]:
def fit(num_epochs, model, loss_fn, opt, train_dl):
    for epoch in range(num_epochs):     # For each epoch
        for xb, yb in train_dl:         # For each batch
            pred = model(xb)            # Make predictions
            loss = loss_fn(pred, yb)    # Calculate loss
            loss.backward()             # Compute loss gradients
            opt.step()                  # Update model parameters 
            opt.zero_grad()             # Reset gradients
        if (epoch+1) % 10 == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

fit(100, model, loss_fn, opt, train_dl)
loss = loss_fn(model(inputs), targets)

loss

Epoch [10/100], Loss: 20.3246
Epoch [20/100], Loss: 243.6873
Epoch [30/100], Loss: 148.9033
Epoch [40/100], Loss: 28.0113
Epoch [50/100], Loss: 12.5532
Epoch [60/100], Loss: 19.9834
Epoch [70/100], Loss: 3.4773
Epoch [80/100], Loss: 27.7119
Epoch [90/100], Loss: 34.7176
Epoch [100/100], Loss: 1.0164


tensor(13.2445, grad_fn=<MseLossBackward0>)

Compare predictions with targets:

In [None]:
model(inputs) - targets

tensor([[ 1.0950,  0.5649],
        [-0.7851, -3.3787],
        [ 3.0320,  5.4487],
        [ 0.0476,  2.3833],
        [-5.1232, -6.5518],
        [-1.1819,  0.4346],
        [-0.2716, -4.8015],
        [ 4.1460,  6.7715],
        [ 2.3244,  2.5136],
        [-5.3328, -4.8442],
        [-0.3915,  1.1421],
        [-3.0619, -3.5090],
        [ 4.5185,  4.8715],
        [ 1.2572,  0.6757],
        [-2.8463, -6.4214]], grad_fn=<SubBackward0>)

Show documentation using the question mark:

In [None]:
?nn.Linear

[0;31mInit signature:[0m
[0mnn[0m[0;34m.[0m[0mLinear[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0min_features[0m[0;34m:[0m [0mint[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mout_features[0m[0;34m:[0m [0mint[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbias[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdevice[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdtype[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0;32mNone[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Applies a linear transformation to the incoming data: :math:`y = xA^T + b`.

This module supports :ref:`TensorFloat32<tf32_on_ampere>`.

On certain ROCm devices, when using float16 inputs this module will use :ref:`different precision<fp16_on_mi200>` for backward.

Args:
    in_features: size of each input sample
    out_features: size of each output sample
    bias: If set 