# Linear Regression Pytorch

In [1]:
import numpy as np
import torch

In [2]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')

In [3]:
# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

In [4]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [6]:
# yield_apple  = w11 * temp + w12 * rainfall + w13 * humidity + b1
# yield_orange = w21 * temp + w22 * rainfall + w23 * humidity + b2
# if we see the above linear reg equations we can see that w is representing matrix and b is representing vector

In [8]:
# weights and biases
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
print(w)
print(b)

tensor([[-0.2023,  1.3672,  0.0169],
        [ 1.0684,  1.3708, -0.9042]], requires_grad=True)
tensor([-0.3253, -1.7143], requires_grad=True)


In [9]:
def model(x):
    return x @ w.t() + b   # @ = matrix multiplication  w.t() = transpose of w ie (m,n) -> (n,m)

In [10]:
# Predictions
preds = model(inputs)
print(preds)

tensor([[ 77.2321, 129.2411],
        [102.6561, 158.2705],
        [166.2532, 222.4806],
        [ 38.4506, 132.7498],
        [118.1466, 140.3073]], grad_fn=<AddBackward0>)


In [14]:
def mse(t1,t2):
    diff = t1 - t2
    return torch.sum(diff * diff)/diff.numel()    # .numel() returns the number of elements

In [15]:
# Compute Loss
loss = mse(preds, targets)
print(loss)

tensor(2807.0913, grad_fn=<DivBackward0>)


In [16]:
rmse = torch.sqrt(loss)
rmse

tensor(52.9820, grad_fn=<SqrtBackward>)

## Adjust weights and biases using gradient descent

We'll reduce the loss and improve our model using the gradient descent optimization algorithm, which has the following steps:

1. Generate predictions

2. Calculate the loss

3. Compute gradients w.r.t the weights and biases

4. Adjust the weights by subtracting a small quantity proportional to the gradient

5. Reset the gradients to zero

Let's implement the above step by step.

In [17]:
# 1) Generate predictions
preds = model(inputs)
print(preds)

tensor([[ 77.2321, 129.2411],
        [102.6561, 158.2705],
        [166.2532, 222.4806],
        [ 38.4506, 132.7498],
        [118.1466, 140.3073]], grad_fn=<AddBackward0>)


In [18]:
# 2) Calculate the loss
loss = mse(preds, targets)
print(loss)

tensor(2807.0913, grad_fn=<DivBackward0>)


In [20]:
# 3) Compute gradients w.r.t the weights and biases
loss.backward()
print(w.grad)
print(b.grad)

tensor([[2070.9502, 2364.3325, 1341.7174],
        [5711.5420, 5432.4194, 3287.3608]])
tensor([24.3477, 64.6099])


In [22]:
# 4) Adjust the weights by subtracting a small quantity proportional to the gradient
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

In [23]:
print(w)
print(b)

tensor([[-0.2231,  1.3435,  0.0035],
        [ 1.0113,  1.3165, -0.9371]], requires_grad=True)
tensor([-0.3256, -1.7149], requires_grad=True)


In [24]:
preds = model(inputs)
print(preds)
loss = mse(preds, targets)
print(loss)   # we can see that the loss has reduced by some amount  

tensor([[ 73.5590, 120.0178],
        [ 97.8320, 146.1879],
        [160.5048, 208.3248],
        [ 34.8249, 123.3711],
        [113.5084, 128.8493]], grad_fn=<AddBackward0>)
tensor(2036.3707, grad_fn=<DivBackward0>)


In [26]:
# lets iterate through 100 epochs
for i in range(100):   # 100 epochs is a hyperparameter
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5   # 1e-5 is also a hyperparameter we can give or change as our wish
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [27]:
# verify loss now
preds = model(inputs)
loss = mse(preds, targets)
print(loss)    # loss is much lower now and thats great

tensor(263.9421, grad_fn=<DivBackward0>)


In [30]:
print(preds)
print(targets)

tensor([[ 57.1495,  74.8783],
        [ 76.5800,  89.9573],
        [131.5145, 149.9718],
        [ 20.7423,  63.8535],
        [ 92.2651,  84.5001]], grad_fn=<AddBackward0>)
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


## Linear regression using PyTorch built-ins

The model and training process above were implemented using basic matrix operations. But since this such a common pattern , PyTorch has several built-in functions and classes to make it easy to create and train models.


In [2]:
import torch.nn as nn

In [105]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], [91, 88, 64], [87, 134, 58], 
                   [102, 43, 37], [69, 96, 70], [73, 67, 43], 
                   [91, 88, 64], [87, 134, 58], [102, 43, 37], 
                   [69, 96, 70], [73, 67, 43], [91, 88, 64], 
                   [87, 134, 58], [102, 43, 37], [69, 96, 70]], 
                  dtype='float32')

# Targets (apples, oranges)
targets = np.array([[56, 70], [81, 101], [119, 133], 
                    [22, 37], [103, 119], [56, 70], 
                    [81, 101], [119, 133], [22, 37], 
                    [103, 119], [56, 70], [81, 101], 
                    [119, 133], [22, 37], [103, 119]], 
                   dtype='float32')
print(inputs)
print(targets)

[[ 73.  67.  43.]
 [ 91.  88.  64.]
 [ 87. 134.  58.]
 [102.  43.  37.]
 [ 69.  96.  70.]
 [ 73.  67.  43.]
 [ 91.  88.  64.]
 [ 87. 134.  58.]
 [102.  43.  37.]
 [ 69.  96.  70.]
 [ 73.  67.  43.]
 [ 91.  88.  64.]
 [ 87. 134.  58.]
 [102.  43.  37.]
 [ 69.  96.  70.]]
[[ 56.  70.]
 [ 81. 101.]
 [119. 133.]
 [ 22.  37.]
 [103. 119.]
 [ 56.  70.]
 [ 81. 101.]
 [119. 133.]
 [ 22.  37.]
 [103. 119.]
 [ 56.  70.]
 [ 81. 101.]
 [119. 133.]
 [ 22.  37.]
 [103. 119.]]


In [106]:
# convert it into tensors
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

In [107]:
from torch.utils.data import TensorDataset

In [108]:
# Define dataset
train_ds = TensorDataset(inputs,targets)
train_ds[0:3]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]), tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

In [109]:
train_ds[[1,5,3,6,2]]

(tensor([[ 91.,  88.,  64.],
         [ 73.,  67.,  43.],
         [102.,  43.,  37.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]), tensor([[ 81., 101.],
         [ 56.,  70.],
         [ 22.,  37.],
         [ 81., 101.],
         [119., 133.]]))

In [110]:
from torch.utils.data import DataLoader

In [111]:
# Define Data Loader
batch_size = 5    # divide or split the whole data into batches of 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)

In [112]:
for xb, yb in train_dl:
    print("Batch:")
    print(xb)
    print(yb)

Batch:
tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [ 87., 134.,  58.],
        [ 91.,  88.,  64.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [119., 133.],
        [ 81., 101.]])
Batch:
tensor([[ 69.,  96.,  70.],
        [ 69.,  96.,  70.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[103., 119.],
        [103., 119.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])
Batch:
tensor([[ 73.,  67.,  43.],
        [102.,  43.,  37.],
        [102.,  43.,  37.],
        [ 73.,  67.,  43.],
        [ 91.,  88.,  64.]])
tensor([[ 56.,  70.],
        [ 22.,  37.],
        [ 22.,  37.],
        [ 56.,  70.],
        [ 81., 101.]])


# nn.Linear

In [113]:
# Define model
model = nn.Linear(3,2) # 3 are number of inputs and 2 are number of targets
print(model.weight)
print(model.bias)

Parameter containing:
tensor([[-0.1260,  0.2871,  0.0093],
        [ 0.3002,  0.1276,  0.4678]], requires_grad=True)
Parameter containing:
tensor([-0.1422,  0.4869], requires_grad=True)


In [114]:
list(model.parameters())

[Parameter containing:
 tensor([[-0.1260,  0.2871,  0.0093],
         [ 0.3002,  0.1276,  0.4678]], requires_grad=True),
 Parameter containing:
 tensor([-0.1422,  0.4869], requires_grad=True)]

In [115]:
# Generate Predictions
preds = model(inputs)
preds

tensor([[10.2965, 51.0612],
        [14.2535, 68.9671],
        [27.9094, 70.8268],
        [-0.3049, 53.8978],
        [19.3788, 66.1908],
        [10.2965, 51.0612],
        [14.2535, 68.9671],
        [27.9094, 70.8268],
        [-0.3049, 53.8978],
        [19.3788, 66.1908],
        [10.2965, 51.0612],
        [14.2535, 68.9671],
        [27.9094, 70.8268],
        [-0.3049, 53.8978],
        [19.3788, 66.1908]], grad_fn=<AddmmBackward>)

In [116]:
# build-in loss function
import torch.nn.functional as F

In [117]:
# define loss funcion
loss_fn = F.mse_loss

In [118]:
loss = loss_fn(model(inputs),targets)
loss

tensor(3065.6050, grad_fn=<MseLossBackward>)

In [119]:
# Optimizer (stochastic gradient descent)

opt = torch.optim.SGD(model.parameters(), lr = 1e-5)  # we need to pass weights and bias matrices ie model.parameters()

## Train the model

We are now ready to train the model. We'll follow the exact same process to implement gradient descent:

1. Generate predictions

2. Calculate the loss

3. Compute gradients w.r.t the weights and biases

4. Adjust the weights by subtracting a small quantity proportional to the gradient

5. Reset the gradients to zero

The only change is that we'll work batches of data, instead of processing the entire training data in every iteration. Let's define a utility function `fit` which trains the model for a given number of epochs.

In [124]:
# Utility function to train the model
def fit(num_epochs, model, loss_fn, opt, train_dl):
    min_loss = 999999
    at_epoch = 0
    for epoch in range(num_epochs):
        for xb, yb in train_dl:
            # Generate predictions
            preds = model(xb)
            
            # Calculate the loss
            loss = loss_fn(preds, yb)
            
            # Compute gradients w.r.t the weights and biases
            loss.backward()
            
            # Adjust the weights by subtracting a small quantity proportional to the gradient
            opt.step()
            
            # Reset the gradients to zero
            opt.zero_grad()
            
        if (epoch+1) % 10 == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))
        if min_loss > loss.item():
            min_loss = loss.item()
            at_epoch = epoch+1
    print('Min Loss at Epoch [{}/{}], Loss: {:.4f}'.format(at_epoch, num_epochs, min_loss))

In [125]:
fit(100, model, loss_fn, opt, train_dl)

Epoch [10/100], Loss: 14.1527
Epoch [20/100], Loss: 2.9620
Epoch [30/100], Loss: 10.1879
Epoch [40/100], Loss: 8.8275
Epoch [50/100], Loss: 8.1289
Epoch [60/100], Loss: 6.4297
Epoch [70/100], Loss: 3.0755
Epoch [80/100], Loss: 9.2591
Epoch [90/100], Loss: 2.4511
Epoch [100/100], Loss: 6.5072
Min Loss at Epoch [93/100], Loss: 1.3945


In [126]:
# Generate predictions
preds = model(inputs)
preds

tensor([[ 57.4294,  70.6527],
        [ 80.4155,  99.5699],
        [122.1076, 134.7381],
        [ 22.7425,  38.5444],
        [ 97.8237, 116.4392],
        [ 57.4294,  70.6527],
        [ 80.4155,  99.5699],
        [122.1076, 134.7381],
        [ 22.7425,  38.5444],
        [ 97.8237, 116.4392],
        [ 57.4294,  70.6527],
        [ 80.4155,  99.5699],
        [122.1076, 134.7381],
        [ 22.7425,  38.5444],
        [ 97.8237, 116.4392]], grad_fn=<AddmmBackward>)

In [127]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])