In [1]:
import torch
import numpy as np

In [2]:
def model(x):
    return x @ w.t() + b

In [3]:
# MSE loss
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

In [4]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')

In [5]:
# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

In [6]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

`torch.randn` creates a tensor with the given shape, with elements picked randomly from a [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution) with mean 0 and standard deviation 1.


In [7]:
# Weights and biases
w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
print(w)
print(b)

tensor([[-1.3695,  1.7332, -0.0315],
        [ 0.0278, -0.2094,  0.6012]], requires_grad=True)
tensor([ 0.7105, -2.2003], requires_grad=True)


@ represents matrix multiplication in PyTorch, and the .t method returns the transpose of a tensor.

The matrix obtained by passing the input data into the model is a set of predictions for the target variables.

Defining the ML model

In [8]:
prediction = model(inputs)
prediction

tensor([[ 15.5045,  11.6541],
        [ 26.5881,  20.3830],
        [111.9807,   7.0331],
        [-65.6171,  13.8796],
        [ 70.3926,  21.7025]], grad_fn=<AddBackward0>)

Comparing with the true (given) values:

In [9]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

## Loss function

Before we improve our model, we need a way to evaluate how well our model is performing. We can compare the model's predictions with the actual targets using the following method:

* Calculate the difference between the two matrices (`preds` and `targets`).
* Square all elements of the difference matrix to remove negative values.
* Calculate the average of the elements in the resulting matrix.

The result is a single number, known as the **mean squared error** (MSE).

You can see a big difference between our model's predictions and the actual targets because we've initialized our model with random weights and biases. Obviously, we can't expect a randomly initialized model to just work.

In [10]:
# Compute loss
loss = mse(prediction, targets)
print(loss)

tensor(4916.2158, grad_fn=<DivBackward0>)


In [11]:
# Compute gradients
loss.backward()

In [12]:
# Gradients for weights
print(w)
print(w.grad)

tensor([[-1.3695,  1.7332, -0.0315],
        [ 0.0278, -0.2094,  0.6012]], requires_grad=True)
tensor([[-3941.0359, -3067.9736, -2231.0266],
        [-6325.2656, -7643.5542, -4528.1445]])


In [31]:
# Adjust weights & reset gradients
for i in range(5000):
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [32]:
print(w)
print(b)

tensor([[-0.3328,  1.0205,  0.2934],
        [-0.2696,  0.7813,  0.9218]], requires_grad=True)
tensor([ 0.7207, -2.2007], requires_grad=True)


In [33]:
# Calculate loss
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(9.5571, grad_fn=<DivBackward0>)


In [16]:
# Train for 100 epochs
while loss < 50:
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [17]:
print(w)
print(b)

tensor([[-1.3301,  1.7638, -0.0092],
        [ 0.0911, -0.1329,  0.6465]], requires_grad=True)
tensor([ 0.7110, -2.1996], requires_grad=True)


In [34]:
# Calculate loss
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(9.5571, grad_fn=<DivBackward0>)


In [35]:
preds

tensor([[ 57.4182,  70.1018],
        [ 79.0201, 101.0136],
        [125.5364, 132.5036],
        [ 21.5141,  37.9995],
        [ 96.2659, 118.7268]], grad_fn=<AddBackward0>)

In [36]:
# Train for 100 epochs
for i in range(100):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [37]:
print(w)
print(b)

tensor([[-0.3314,  0.9983,  0.3273],
        [-0.2769,  0.7875,  0.9226]], requires_grad=True)
tensor([ 0.7209, -2.2007], requires_grad=True)


In [38]:
preds

tensor([[ 57.4891,  70.0270],
        [ 79.3586, 100.9569],
        [124.6524, 132.7529],
        [ 21.9550,  37.5642],
        [ 96.5976, 118.8822]], grad_fn=<AddBackward0>)

In [39]:
# Calculate loss
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(7.8102, grad_fn=<DivBackward0>)


In [40]:
# Train for 100 epochs
for i in range(5000):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-6
        b -= b.grad * 1e-6
        w.grad.zero_()
        b.grad.zero_()

In [41]:
# Calculate loss
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(3.3368, grad_fn=<DivBackward0>)


In [42]:
preds

tensor([[ 57.4812,  69.9450],
        [ 80.4469, 100.8977],
        [122.1902, 133.0198],
        [ 22.0185,  37.0866],
        [ 98.4540, 119.0580]], grad_fn=<AddBackward0>)

In [43]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])