# Linear Regression using PyTorch

In [1]:
import numpy as np
import torch

### Dummy Training Data

In [2]:
inputs = np.array([
    [73, 67, 43],
    [91, 88, 64],
    [87, 134, 58],
    [102, 43, 37],
    [69, 96, 70]
], dtype='float32')

In [3]:
targets = np.array([
    [56, 70],
    [81, 101],
    [119, 133],
    [22, 37],
    [103, 119]
], dtype='float32')

In [4]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


### Initiate random weights and biases

In [5]:
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
print(w)
print(b)

tensor([[ 0.3597, -1.1119,  0.0980],
        [ 0.1386, -1.4902, -1.2923]], requires_grad=True)
tensor([0.1195, 0.6990], requires_grad=True)


### The Model
According to Linear Regression: Target = Inputs * Weights (transposed) + Biases

In [6]:
def model(x):
    return x @ w.t() + b

- `@` is used to represent Matrix Multiplication.
- `*` is used for normal element wise multiplication.
- `t()` function is used to get the transpose of a matrix in PyTorch.

In [7]:
preds = model(inputs)
print(preds)

tensor([[ -43.9046, -144.5905],
        [ -58.7211, -200.5264],
        [-111.8953, -261.8745],
        [  -7.3766,  -97.0537],
        [ -74.9410, -223.2505]], grad_fn=<AddBackward0>)


In [8]:
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


### The Loss Function

This is used to find out how far away from target our predictions is. We will use MSE (Mean Squared Error). I.e, the following steps to create the loss function:
- Calculate the difference between `preds` and `targets`
- Square each element of the difference matrix to remove negative, and
- Take average of the squared difference matrix

In [9]:
def mse(target, pred):
    diff = target - pred
    return torch.sum(diff * diff) / diff.numel()

- `troch.sum()` returns sum of all elements in the tensor.
- `troch.numel()` returns number of elements in a tensor.

In [10]:
loss = mse(targets, preds)
print(loss)

tensor(54334.0391, grad_fn=<DivBackward0>)


### Compute the Gradients

In [11]:
loss.backward()

In [12]:
w.grad, b.grad

(tensor([[-11073.9756, -13654.9102,  -8034.5547],
         [-22949.3711, -26489.0879, -16069.0664]]),
 tensor([-135.5677, -277.4591]))

### Gradient Descent

If gradient is **positive**:
- **Increasing** the **element** slightly will **increase** the **loss**
- **Decreasing** the **element** slightly will **decrease** the **loss**

If gradient is **negative**:
- **Increasing** the **element** slightly will **decrease** the **loss**
- **Decreasing** the **element** slightly will **increase** the **loss**

In [13]:
w.grad.zero_()
b.grad.zero_()
print(w.grad, '\n', b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]]) 
 tensor([0., 0.])


### Adjustment to weights and biases

The following procedure is used itteratively:
- Generate predictions
- Calculate the loss
- Compute gradients w.r.t weights and biases
- Adjust weights and biases with quantity proportional to gradients
- Reset the gradients to zero

In [14]:
preds = model(inputs)
preds

tensor([[ -43.9046, -144.5905],
        [ -58.7211, -200.5264],
        [-111.8953, -261.8745],
        [  -7.3766,  -97.0537],
        [ -74.9410, -223.2505]], grad_fn=<AddBackward0>)

In [15]:
loss = mse(targets, preds)
loss

tensor(54334.0391, grad_fn=<DivBackward0>)

In [16]:
loss.backward()
print(w.grad, '\n', b.grad)

tensor([[-11073.9756, -13654.9102,  -8034.5547],
        [-22949.3711, -26489.0879, -16069.0664]]) 
 tensor([-135.5677, -277.4591])


In [17]:
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

- `torch.no_grad()` makes sure the the expressions inside are not tracked for gradient calculation further
- here `1e-5` is the **learning rate**

In [18]:
print(w, '\n', b)

tensor([[ 0.4704, -0.9753,  0.1784],
        [ 0.3681, -1.2253, -1.1316]], requires_grad=True) 
 tensor([0.1209, 0.7017], requires_grad=True)


In [19]:
preds = model(inputs)
loss = mse(targets, preds)
w.grad.zero_()
b.grad.zero_()
print(loss)

tensor(37394.2539, grad_fn=<DivBackward0>)


### Training for multiple epochs

In [20]:
for i in range(100):
    preds = model(inputs)
    loss = mse(targets, preds)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

Let's create more predictions and check error

In [21]:
preds = model(inputs)
loss = mse(targets, preds)
print(loss)

tensor(730.2789, grad_fn=<DivBackward0>)


In [22]:
preds

tensor([[ 65.0587,  79.6578],
        [ 88.2913,  99.0813],
        [ 92.1656, 121.5447],
        [ 66.9162,  89.8647],
        [ 85.7821,  85.6728]], grad_fn=<AddBackward0>)

In [23]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

In [24]:
for i in range(100):
    preds = model(inputs)
    loss = mse(targets, preds)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [25]:
preds = model(inputs)
loss = mse(targets, preds)
print(loss)
print(preds)
print(targets)

tensor(239.8286, grad_fn=<DivBackward0>)
tensor([[ 61.3553,  75.6765],
        [ 85.4275,  96.7746],
        [104.6325, 133.1916],
        [ 45.3583,  66.7654],
        [ 93.3798,  95.1394]], grad_fn=<AddBackward0>)
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


# Linear Regression using Built-in Function in PyTorch

In [26]:
import torch.nn as nn

In [27]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], [91, 88, 64], [87, 134, 58], 
                   [102, 43, 37], [69, 96, 70], [73, 67, 43], 
                   [91, 88, 64], [87, 134, 58], [102, 43, 37], 
                   [69, 96, 70], [73, 67, 43], [91, 88, 64], 
                   [87, 134, 58], [102, 43, 37], [69, 96, 70]], 
                  dtype='float32')

# Targets (apples, oranges)
targets = np.array([[56, 70], [81, 101], [119, 133], 
                    [22, 37], [103, 119], [56, 70], 
                    [81, 101], [119, 133], [22, 37], 
                    [103, 119], [56, 70], [81, 101], 
                    [119, 133], [22, 37], [103, 119]], 
                   dtype='float32')

inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

In [28]:
from torch.utils.data import TensorDataset

In [29]:
train_ds = TensorDataset(inputs, targets)

In [30]:
train_ds[0:3]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

In [31]:
from torch.utils.data import DataLoader

In [32]:
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)

In [33]:
for xb, yb in train_dl:
    print('batch:\n', xb, '\n', yb)

batch:
 tensor([[ 73.,  67.,  43.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.],
        [ 87., 134.,  58.],
        [ 91.,  88.,  64.]]) 
 tensor([[ 56.,  70.],
        [ 22.,  37.],
        [103., 119.],
        [119., 133.],
        [ 81., 101.]])
batch:
 tensor([[102.,  43.,  37.],
        [ 69.,  96.,  70.],
        [ 91.,  88.,  64.],
        [ 69.,  96.,  70.],
        [ 73.,  67.,  43.]]) 
 tensor([[ 22.,  37.],
        [103., 119.],
        [ 81., 101.],
        [103., 119.],
        [ 56.,  70.]])
batch:
 tensor([[102.,  43.,  37.],
        [ 87., 134.,  58.],
        [ 87., 134.,  58.],
        [ 91.,  88.,  64.],
        [ 73.,  67.,  43.]]) 
 tensor([[ 22.,  37.],
        [119., 133.],
        [119., 133.],
        [ 81., 101.],
        [ 56.,  70.]])


In [34]:
model = nn.Linear(3, 2)
print(model.weight, '\n', model.bias)

Parameter containing:
tensor([[-0.4691, -0.3462, -0.2456],
        [-0.4136,  0.1522, -0.5630]], requires_grad=True) 
 Parameter containing:
tensor([-0.0370, -0.1430], requires_grad=True)


In [35]:
list(model.parameters())

[Parameter containing:
 tensor([[-0.4691, -0.3462, -0.2456],
         [-0.4136,  0.1522, -0.5630]], requires_grad=True),
 Parameter containing:
 tensor([-0.0370, -0.1430], requires_grad=True)]

In [36]:
preds = model(inputs)
preds

tensor([[ -68.0397,  -44.3468],
        [ -88.9125,  -60.4183],
        [-101.4887,  -48.3837],
        [ -71.8597,  -56.6166],
        [ -82.8364,  -53.4792],
        [ -68.0397,  -44.3468],
        [ -88.9125,  -60.4183],
        [-101.4887,  -48.3837],
        [ -71.8597,  -56.6166],
        [ -82.8364,  -53.4792],
        [ -68.0397,  -44.3468],
        [ -88.9125,  -60.4183],
        [-101.4887,  -48.3837],
        [ -71.8597,  -56.6166],
        [ -82.8364,  -53.4792]], grad_fn=<AddmmBackward>)

In [37]:
import torch.nn.functional as F

In [38]:
loss_fn = F.mse_loss

In [39]:
loss = loss_fn(model(inputs), targets)
loss

tensor(24676.0430, grad_fn=<MseLossBackward>)

In [40]:
opt = torch.optim.SGD(model.parameters(), lr=1e-5)

In [41]:
def fit(num_epochs, model, loss_fn, opt):
    for i in range(num_epochs):
        for xb, yb in train_dl:
            pred = model(xb)
            loss = loss_fn(pred, yb)
            loss.backward()
            opt.step()
            opt.zero_grad()
        if (i + 1) % 10 == 0:
            print('Epoch [{} / {}], Loss: {:.4f}'.format(i + 1, num_epochs, loss.item()))

In [42]:
fit(200, model, loss_fn, opt)

Epoch [10 / 200], Loss: 389.6576
Epoch [20 / 200], Loss: 30.2048
Epoch [30 / 200], Loss: 92.4345
Epoch [40 / 200], Loss: 153.5367
Epoch [50 / 200], Loss: 55.3018
Epoch [60 / 200], Loss: 71.0628
Epoch [70 / 200], Loss: 48.1772
Epoch [80 / 200], Loss: 49.7548
Epoch [90 / 200], Loss: 58.3755
Epoch [100 / 200], Loss: 45.4874
Epoch [110 / 200], Loss: 20.7166
Epoch [120 / 200], Loss: 43.9534
Epoch [130 / 200], Loss: 41.5476
Epoch [140 / 200], Loss: 45.3659
Epoch [150 / 200], Loss: 28.1625
Epoch [160 / 200], Loss: 26.9021
Epoch [170 / 200], Loss: 15.2140
Epoch [180 / 200], Loss: 26.8628
Epoch [190 / 200], Loss: 29.4764
Epoch [200 / 200], Loss: 28.3240


In [43]:
 preds = model(inputs)
 preds

tensor([[ 57.4842,  70.8464],
        [ 80.7485,  96.5263],
        [121.5020, 141.3860],
        [ 22.7942,  39.9214],
        [ 98.3772, 110.2158],
        [ 57.4842,  70.8464],
        [ 80.7485,  96.5263],
        [121.5020, 141.3860],
        [ 22.7942,  39.9214],
        [ 98.3772, 110.2158],
        [ 57.4842,  70.8464],
        [ 80.7485,  96.5263],
        [121.5020, 141.3860],
        [ 22.7942,  39.9214],
        [ 98.3772, 110.2158]], grad_fn=<AddmmBackward>)

In [44]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])