In [1]:
import numpy as np
import torch

In [2]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')
# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

In [3]:
inputs.shape

(5, 3)

In [4]:
targets.shape

(5, 2)

In [5]:
# Convert inputs and targets to tensors
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [6]:
# Weights and biases
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
print(w)
print(b)

tensor([[-2.7338,  1.1521,  0.4068],
        [ 0.8389, -0.6128, -2.3575]], requires_grad=True)
tensor([ 1.0640, -0.5233], requires_grad=True)


In [7]:
def model(x):
    return x @ w.t() + b

In [8]:
# Generate predictions
preds = model(inputs)
print(preds)

tensor([[-103.8177,  -81.7122],
        [-120.2882, -128.9881],
        [ -58.7990, -146.3894],
        [-213.1877,  -28.5298],
        [ -48.4877, -166.4925]], grad_fn=<AddBackward0>)


In [9]:
# Compare with targets
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [10]:
# MSE loss
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

In [11]:
# Compute loss
loss = mse(preds, targets)
print(loss)

tensor(41570.2578, grad_fn=<DivBackward0>)


In [12]:
# Compute gradients
loss.backward()

In [13]:
# Gradients for weights
print(w)
print(w.grad)

tensor([[-2.7338,  1.1521,  0.4068],
        [ 0.8389, -0.6128, -2.3575]], requires_grad=True)
tensor([[-15978.8477, -15380.4238,  -9874.6074],
        [-16538.7598, -19613.3809, -11971.3047]])


In [14]:
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


In [15]:
# Generate predictions
preds = model(inputs)
print(preds)

tensor([[-103.8177,  -81.7122],
        [-120.2882, -128.9881],
        [ -58.7990, -146.3894],
        [-213.1877,  -28.5298],
        [ -48.4877, -166.4925]], grad_fn=<AddBackward0>)


In [16]:
# Calculate the loss
loss = mse(preds, targets)
print(loss)

tensor(41570.2578, grad_fn=<DivBackward0>)


In [17]:
# Compute gradients
loss.backward()
print(w.grad)
print(b.grad)

tensor([[-15978.8477, -15380.4238,  -9874.6074],
        [-16538.7598, -19613.3809, -11971.3047]])
tensor([-185.1161, -202.4224])


In [18]:
# Adjust weights & reset gradients
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

In [19]:
print(w)
print(b)

tensor([[-2.5740,  1.3059,  0.5056],
        [ 1.0043, -0.4167, -2.2378]], requires_grad=True)
tensor([ 1.0659, -0.5212], requires_grad=True)


In [20]:
# Calculate loss
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(28903.1680, grad_fn=<DivBackward0>)


In [21]:
# Train for 100 epochs
for i in range(2000):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [22]:
w

tensor([[-0.4136,  0.8400,  0.7013],
        [-0.2072,  0.9442,  0.5335]], requires_grad=True)

In [23]:
b

tensor([ 1.0834, -0.5130], requires_grad=True)

In [24]:
# Calculate loss
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(8.7398, grad_fn=<DivBackward0>)


In [25]:
# Predictions
preds

tensor([[ 57.3277,  70.5637],
        [ 82.2501,  97.8653],
        [118.3369, 138.9266],
        [ 20.9669,  38.6943],
        [102.2759, 113.1774]], grad_fn=<AddBackward0>)

In [26]:
# Targets
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

In [27]:
# Train for 100 epochs
for i in range(4500):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [28]:
# Calculate loss
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(0.5272, grad_fn=<DivBackward0>)


In [29]:
# Predictions
preds

tensor([[ 57.3460,  70.2284],
        [ 82.0959, 100.6814],
        [118.6582, 133.0611],
        [ 21.0572,  37.0509],
        [101.9549, 119.0356]], grad_fn=<AddBackward0>)

In [30]:
# Targets
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

In [31]:
!pip install jovian --upgrade -q

## Linear regression using PyTorch built-ins

The model and training process above were implemented using basic matrix operations. But since this such a common pattern , PyTorch has several built-in functions and classes to make it easy to create and train models.

Let's begin by importing the `torch.nn` package from PyTorch, which contains utility classes for building neural networks.

In [32]:
import torch.nn as nn

In [33]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], [91, 88, 64], [87, 134, 58], 
                   [102, 43, 37], [69, 96, 70], [73, 67, 43], 
                   [91, 88, 64], [87, 134, 58], [102, 43, 37], 
                   [69, 96, 70], [73, 67, 43], [91, 88, 64], 
                   [87, 134, 58], [102, 43, 37], [69, 96, 70]], 
                  dtype='float32')

# Targets (apples, oranges)
targets = np.array([[56, 70], [81, 101], [119, 133], 
                    [22, 37], [103, 119], [56, 70], 
                    [81, 101], [119, 133], [22, 37], 
                    [103, 119], [56, 70], [81, 101], 
                    [119, 133], [22, 37], [103, 119]], 
                   dtype='float32')

inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

In [34]:
inputs

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.],
        [ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.],
        [ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])

## Dataset and DataLoader

We'll create a `TensorDataset`, which allows access to rows from `inputs` and `targets` as tuples, and provides standard APIs for working with many different types of datasets in PyTorch.

In [35]:
from torch.utils.data import TensorDataset

In [36]:
# Define dataset
train_ds = TensorDataset(inputs, targets)
train_ds[0:3]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

In [37]:
from torch.utils.data import DataLoader

In [38]:
# Define data loader
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)

In [39]:
for xb, yb in train_dl:
    print(xb)
    print(yb)
    break

tensor([[102.,  43.,  37.],
        [ 87., 134.,  58.],
        [ 69.,  96.,  70.],
        [ 87., 134.,  58.],
        [ 91.,  88.,  64.]])
tensor([[ 22.,  37.],
        [119., 133.],
        [103., 119.],
        [119., 133.],
        [ 81., 101.]])


## nn.Linear

Instead of initializing the weights & biases manually, we can define the model using the `nn.Linear` class from PyTorch, which does it automatically.

In [40]:
# Define model
model = nn.Linear(3, 2)
print(model.weight)
print(model.bias)

Parameter containing:
tensor([[-0.4507,  0.2257, -0.1988],
        [-0.3660, -0.4758,  0.3986]], requires_grad=True)
Parameter containing:
tensor([-0.3918,  0.0508], requires_grad=True)


In [41]:
# Parameters
list(model.parameters())

[Parameter containing:
 tensor([[-0.4507,  0.2257, -0.1988],
         [-0.3660, -0.4758,  0.3986]], requires_grad=True),
 Parameter containing:
 tensor([-0.3918,  0.0508], requires_grad=True)]

We can use the model to generate predictions in the exact same way as before:

In [42]:
# Generate predictions
preds = model(inputs)
preds

tensor([[-26.7165, -41.4072],
        [-34.2634, -49.6169],
        [-20.8838, -72.4300],
        [-44.0112, -42.9943],
        [-23.7353, -42.9794],
        [-26.7165, -41.4072],
        [-34.2634, -49.6169],
        [-20.8838, -72.4300],
        [-44.0112, -42.9943],
        [-23.7353, -42.9794],
        [-26.7165, -41.4072],
        [-34.2634, -49.6169],
        [-20.8838, -72.4300],
        [-44.0112, -42.9943],
        [-23.7353, -42.9794]], grad_fn=<AddmmBackward>)

## Loss Function

Instead of defining a loss function manually, we can use the built-in loss function `mse_loss`.

In [43]:
# Import nn.functional
import torch.nn.functional as F

The `nn.functional` package contains many useful loss functions and several other utilities. 

In [44]:
# Define loss function
loss_fn = F.mse_loss

Let's compute the loss for the current predictions of our model.

In [45]:
loss = loss_fn(model(inputs), targets)
print(loss)

tensor(17004.9395, grad_fn=<MseLossBackward>)


## Optimizer

Instead of manually manipulating the model's weights & biases using gradients, we can use the optimizer `optim.SGD`. SGD stands for `stochastic gradient descent`. It is called `stochastic` because samples are selected in batches (often with random shuffling) instead of as a single group.

In [46]:
# Define optimizer
opt = torch.optim.SGD(model.parameters(), lr=1e-5)

Note that `model.parameters()` is passed as an argument to `optim.SGD`, so that the optimizer knows which matrices should be modified during the update step. Also, we can specify a learning rate which controls the amount by which the parameters are modified.

## Train the model

We are now ready to train the model. We'll follow the exact same process to implement gradient descent:

1. Generate predictions

2. Calculate the loss

3. Compute gradients w.r.t the weights and biases

4. Adjust the weights by subtracting a small quantity proportional to the gradient

5. Reset the gradients to zero

The only change is that we'll work batches of data, instead of processing the entire training data in every iteration. Let's define a utility function `fit` which trains the model for a given number of epochs.

In [47]:
# Utility function to train the model
def fit(num_epochs, model, loss_fn, opt, train_dl):
    
    # Repeat for given number of epochs
    for epoch in range(num_epochs):
        
        # Train with batches of data
        for xb,yb in train_dl:
            
            # 1. Generate predictions
            pred = model(xb)
            
            # 2. Calculate loss
            loss = loss_fn(pred, yb)
            
            # 3. Compute gradients
            loss.backward()
            
            # 4. Update parameters using gradients
            opt.step()
            
            # 5. Reset the gradients to zero
            opt.zero_grad()
        
        # Print the progress
        if (epoch+1) % 10 == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

In [48]:
fit(100, model, loss_fn, opt, train_dl)

Epoch [10/100], Loss: 127.8820
Epoch [20/100], Loss: 87.3328
Epoch [30/100], Loss: 68.6895
Epoch [40/100], Loss: 106.0618
Epoch [50/100], Loss: 47.3130
Epoch [60/100], Loss: 50.4639
Epoch [70/100], Loss: 26.2628
Epoch [80/100], Loss: 11.9688
Epoch [90/100], Loss: 16.2707
Epoch [100/100], Loss: 8.4678


Some things to note above:

* We use the data loader defined earlier to get batches of data for every iteration.

* Instead of updating parameters (weights and biases) manually, we use `opt.step` to perform the update, and `opt.zero_grad` to reset the gradients to zero.

* We've also added a log statement which prints the loss from the last batch of data for every 10th epoch, to track the progress of training. `loss.item` returns the actual value stored in the loss tensor.

Let's train the model for 100 epochs.

In [49]:
fit(1000, model, loss_fn, opt, train_dl)

Epoch [10/1000], Loss: 15.7812
Epoch [20/1000], Loss: 5.3975
Epoch [30/1000], Loss: 21.1533
Epoch [40/1000], Loss: 13.3486
Epoch [50/1000], Loss: 8.5648
Epoch [60/1000], Loss: 1.3637
Epoch [70/1000], Loss: 10.8330
Epoch [80/1000], Loss: 6.3020
Epoch [90/1000], Loss: 4.5924
Epoch [100/1000], Loss: 7.3259
Epoch [110/1000], Loss: 6.9167
Epoch [120/1000], Loss: 12.6976
Epoch [130/1000], Loss: 4.7627
Epoch [140/1000], Loss: 4.4613
Epoch [150/1000], Loss: 6.2067
Epoch [160/1000], Loss: 3.9803
Epoch [170/1000], Loss: 5.8236
Epoch [180/1000], Loss: 5.6693
Epoch [190/1000], Loss: 2.7416
Epoch [200/1000], Loss: 4.2112
Epoch [210/1000], Loss: 3.1465
Epoch [220/1000], Loss: 3.0218
Epoch [230/1000], Loss: 1.1256
Epoch [240/1000], Loss: 2.1762
Epoch [250/1000], Loss: 2.1117
Epoch [260/1000], Loss: 1.1763
Epoch [270/1000], Loss: 2.5358
Epoch [280/1000], Loss: 2.2734
Epoch [290/1000], Loss: 1.9285
Epoch [300/1000], Loss: 3.5903
Epoch [310/1000], Loss: 1.8371
Epoch [320/1000], Loss: 1.9195
Epoch [330/1

Let's generate predictions using our model and verify that they're close to our targets.

In [50]:
# Generate predictions
preds = model(inputs)
preds

tensor([[ 57.1684,  70.3139],
        [ 82.1393, 100.6602],
        [119.1349, 132.9487],
        [ 21.2357,  37.0097],
        [101.6152, 119.1361],
        [ 57.1684,  70.3139],
        [ 82.1393, 100.6602],
        [119.1349, 132.9487],
        [ 21.2357,  37.0097],
        [101.6152, 119.1361],
        [ 57.1684,  70.3139],
        [ 82.1393, 100.6602],
        [119.1349, 132.9487],
        [ 21.2357,  37.0097],
        [101.6152, 119.1361]], grad_fn=<AddmmBackward>)

In [51]:
# Compare with targets
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

Indeed, the predictions are quite close to our targets, and now we have a fairly good model to predict crop yields for apples and oranges by looking at the average temperature, rainfall and humidity in a region.

## Commit and update the notebook

As a final step, we can record a new version of the notebook using the `jovian` library.

In [52]:
import jovian

<IPython.core.display.Javascript object>

In [53]:
jovian.commit(project='linear-regression-pytorch')

<IPython.core.display.Javascript object>

[jovian] Attempting to save notebook..[0m
[jovian] Detected Kaggle notebook...[0m
[jovian] Please enter your API key ( from https://jovian.ml/ ):[0m
API KEY: ········
[jovian] Uploading notebook to https://jovian.ml/satnam00/linear-regression-pytorch[0m


<IPython.core.display.Javascript object>