In [1]:
import torch

We'll create
a model that predicts crop yields for apples and oranges (target variables) by looking at the average temperature,
rainfall, and humidity (input variables or features) in a region



# Training

In [2]:
import numpy as np
import torch

# Training Data
The training data can be represented using 2 matrices: `inputs` and `targets`, each with one row per observation, and one column per veriable

In [3]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43],
                    [91, 88, 64],
                    [87, 134, 58],
                    [102, 43, 37],
                    [69, 96, 70]], dtype='float32')

In [4]:
# Targets (apples, oranges)

targets = np.array([[56, 70],
                   [81, 101],
                   [119, 133],
                   [22, 37],
                   [103, 119]], dtype = 'float32')

In [5]:
inputs

array([[ 73.,  67.,  43.],
       [ 91.,  88.,  64.],
       [ 87., 134.,  58.],
       [102.,  43.,  37.],
       [ 69.,  96.,  70.]], dtype=float32)

In [7]:
targets

array([[ 56.,  70.],
       [ 81., 101.],
       [119., 133.],
       [ 22.,  37.],
       [103., 119.]], dtype=float32)

In [8]:
# Convert inputs and targets to tensors
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


# Linear regression model fron scratch

The weights and the biases can also be represented as matrices initialized as random variables
we will start with random weights
- for the weights, we create  2 rows , 3 columns as seen so that they could be multiplied with the respective inputs
- we will also create random bias

In [38]:
# Weights abd biases
w = torch.randn(2, 3, requires_grad = True)
b = torch.randn(2, requires_grad = True)

print(w)
print(b)

tensor([[ 0.3331,  1.3555,  0.9108],
        [-1.3847,  0.5854, -0.9380]], requires_grad=True)
tensor([ 1.6694, -0.6855], requires_grad=True)


torch.randn creates a tensor with the given shape, with elements picked randomly from a normal distribution
with mean 0 and standard deviation 1.
Our model is simply a function that performs a matrix multiplication of the inputs and the weights w
(transposed) and adds the bias b (replicated for each observation)

We can define the model as follows

In [39]:
def model(x):
    return x @ w.t() + b

@ represents matrix multiplication in PyTorch, and the .t method returns the transpose of a tensor.

In [40]:
# Generate predictions
preds = model(inputs)
print(preds)

tensor([[ 155.9711, -102.8800],
        [ 209.5597, -135.2092],
        [ 265.1173,  -97.1119],
        [ 127.6338, -151.4588],
        [ 218.5402, -105.6904]], grad_fn=<AddBackward0>)


In [41]:
# Compare with targets
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


# Loss Function
- Calculate the difference between the two matrices (preds and targets).
- Square all elements of the difference matrix to remove negative values.
- Calculate the average of the elements in the resulting matrix.
- The result is a single number, known as the *mean squared error* (MSE).

In [42]:
# Calculate the differences btw two matrices
diff = preds - targets
# Square and find the average to get the MSE
MSE = torch.sum(diff * diff)/diff.numel()
print(diff.numel())
print(MSE)

10
tensor(29701.6367, grad_fn=<DivBackward0>)


In [43]:
# MSE loss
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff)/diff.numel()

- The `torch.sum` returns the sum of all the elements in a tensor
- The `.numel` method returns the number of elements in the tensor

In [44]:
# Compute the loss
loss = mse(preds, targets)
print(loss)

tensor(29701.6367, grad_fn=<DivBackward0>)


# Compute gradients

with pytorch, we can automatically compute the gradients or derivative of the loss wrt to the weights and biasis, because they have `request_grad` set to `True`

In [45]:
# compute gradient
loss.backward()

In [46]:
# Gradients for weights
print(w)
print(w.grad)

tensor([[ 0.3331,  1.3555,  0.9108],
        [-1.3847,  0.5854, -0.9380]], requires_grad=True)
tensor([[ 10091.1895,  10645.0312,   6599.5298],
        [-17772.2891, -18575.6758, -11719.8047]])


# Adjust weights and biases to reduce the loss

In [53]:
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


# Adjust the weights and biases using gradient descent
As seen above, we reduce the loss and improve our model using the gradient descent optimization algorithm.
Thus, we can train the model using the following steps:
1. Generate predictions
2. Calculate the loss
3. Compute gradients w.r.t the weights and biases
4. Adjust the weights by subtracting a small quantity proportional to the gradient
5. Reset the gradients to zero

In [54]:
# Generate predictions
preds = model(inputs)
print(preds)

tensor([[ 155.9711, -102.8800],
        [ 209.5597, -135.2092],
        [ 265.1173,  -97.1119],
        [ 127.6338, -151.4588],
        [ 218.5402, -105.6904]], grad_fn=<AddBackward0>)


In [55]:
# Calculate the loss
loss = mse(preds, targets)
print(loss)

tensor(29701.6367, grad_fn=<DivBackward0>)


In [56]:
# Compute gradients
loss.backward()
print(w.grad)
print(b.grad)

tensor([[ 10091.1895,  10645.0312,   6599.5298],
        [-17772.2891, -18575.6758, -11719.8047]])
tensor([ 119.1644, -210.4701])


Finally we update the weights and biases using the gradients computed

In [61]:
# Adjust weights and biases

with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    
    w.grad.zero_()
    b.grad.zero_()

In [63]:
# Lets take a look at the weights and the biases
print(w)
print(b)

tensor([[ 0.2322,  1.2491,  0.8448],
        [-1.2070,  0.7712, -0.8208]], requires_grad=True)
tensor([ 1.6682, -0.6834], requires_grad=True)


# We go again

In [64]:
# Generate predictions
preds = model(inputs)
print(preds)

tensor([[ 138.6333,  -72.4189],
        [ 186.7842,  -95.1871],
        [ 238.2447,  -49.9590],
        [ 110.3204, -121.0050],
        [ 196.7372,  -67.3889]], grad_fn=<AddBackward0>)


In [65]:
# Calculate the loss
loss = mse(preds, targets)
print(loss)

tensor(20077.7930, grad_fn=<DivBackward0>)


# Train for multiple epochs

In [68]:
# Train for 100 epochs

for i in range(100):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        
        w.grad.zero_()
        b.grad.zero_()

Let's verify the answer

In [69]:
# Calculate the loss
loss = mse(preds, targets)
print(loss)

tensor(103.9035, grad_fn=<DivBackward0>)


In [70]:
# predictions
preds

tensor([[ 58.4268,  68.9596],
        [ 81.9021,  90.4754],
        [117.3632, 158.3120],
        [ 26.6820,  28.8905],
        [ 98.4497, 106.0109]], grad_fn=<AddBackward0>)

In [71]:
# Targets
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

# Linear regression with PyTorch built-ins

In [72]:
import torch.nn as nn

In [79]:
# Inputs (temp, rainfall, humidity)

inputs = np.array([[73, 67, 43], [91, 88, 64],
                    [87, 134, 58], [102, 43, 37],
                    [69, 96, 70], [74, 66, 43],
                    [91, 87, 65], [88, 134, 59],
                    [101, 44, 37], [68, 96, 71],
                    [73, 66, 44], [92, 87, 64], 
                    [87, 135, 57], [103, 43, 36], [68, 97, 70]], dtype='float32')


# Targets (apples, oranges)
targets = np.array([[56, 70], [81, 101], [119, 133],
                    [22, 37],  [103, 119], [57, 69], 
                    [80, 102], [118, 132], [21, 38], 
                    [104, 118], [57, 69], [82, 100], 
                    [118, 134], [20, 38], [102, 120]], dtype='float32')

inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

In [80]:
inputs

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.],
        [ 74.,  66.,  43.],
        [ 91.,  87.,  65.],
        [ 88., 134.,  59.],
        [101.,  44.,  37.],
        [ 68.,  96.,  71.],
        [ 73.,  66.,  44.],
        [ 92.,  87.,  64.],
        [ 87., 135.,  57.],
        [103.,  43.,  36.],
        [ 68.,  97.,  70.]])

In [81]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 57.,  69.],
        [ 80., 102.],
        [118., 132.],
        [ 21.,  38.],
        [104., 118.],
        [ 57.,  69.],
        [ 82., 100.],
        [118., 134.],
        [ 20.,  38.],
        [102., 120.]])

Here we are using 15 elements

# Dataset and DataLoader

We'll create a TensorDataset , which allows access to rows from inputs and targets as tuples, and
provides standard APIs for working with many different types of datasets in PyTorch.

In [85]:
from torch.utils.data import TensorDataset

In [87]:
# Define dataset
train_ds = TensorDataset(inputs, targets)
train_ds[0:3]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

In [88]:
from torch.utils.data import DataLoader

In [96]:
# Define a data loader
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle = True)

The data loader is typically used in a `for-in` loop. Let's take a look at the example

In [97]:
for xb, yb in train_dl:
    print(xb)
    print(yb)
    break

tensor([[ 92.,  87.,  64.],
        [101.,  44.,  37.],
        [ 68.,  96.,  71.],
        [ 74.,  66.,  43.],
        [ 73.,  67.,  43.]])
tensor([[ 82., 100.],
        [ 21.,  38.],
        [104., 118.],
        [ 57.,  69.],
        [ 56.,  70.]])


# nn.Linear
Instead of initializing the weights & biases manually, we can define the model using the `nn.Linear` class from PyTorch, which does it automatically

In [100]:
# Define the model
model = nn.Linear(3, 2) # The (3,2) means three input and 2 target variables
print(model.weight)
print(model.bias)

Parameter containing:
tensor([[-0.1525,  0.3269, -0.3252],
        [-0.2350, -0.0743,  0.1175]], requires_grad=True)
Parameter containing:
tensor([-0.2382,  0.1200], requires_grad=True)


PyTorch models also have a helpful .parameters method, which returns a list containing all the weights and
bias matrices present in the model. For our linear regression model, we have one weight matrix and one bias
matrix.

In [103]:
# To check for the parameters
list(model.parameters())

[Parameter containing:
 tensor([[-0.1525,  0.3269, -0.3252],
         [-0.2350, -0.0743,  0.1175]], requires_grad=True),
 Parameter containing:
 tensor([-0.2382,  0.1200], requires_grad=True)]

In [104]:
preds = model(inputs)
print(preds)

tensor([[ -3.4493, -16.9638],
        [ -6.1574, -20.2879],
        [ 11.4421, -23.4714],
        [-13.7672, -22.6997],
        [ -2.1377, -15.0077],
        [ -3.9287, -17.1244],
        [ -6.8094, -20.0961],
        [ 10.9644, -23.5890],
        [-13.2878, -22.5391],
        [ -2.3104, -14.6553],
        [ -4.1013, -16.7720],
        [ -6.6368, -20.4486],
        [ 12.0942, -23.6632],
        [-13.5945, -23.0522],
        [ -1.6583, -14.8471]], grad_fn=<AddmmBackward0>)


# Loss Function

In [106]:
# Import nn.functional
import torch.nn.functional as F

In [109]:
# Define the loss function
loss_fn = F.mse_loss

In [113]:
loss = loss_fn(model(inputs), targets)
loss

tensor(10320.0674, grad_fn=<MseLossBackward0>)

# Optimizer