In [14]:
import numpy as np
import torch

In [15]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43],
                   [91, 88, 64],
                   [87, 134, 58],
                   [102, 43, 37],
                   [69, 96, 70]], dtype='float32')

In [16]:
# Targets (apples, oranges)
targets = np.array([[56, 70],
                    [81, 101],
                    [119, 133],
                    [22, 37],
                    [103, 119]], dtype='float32')

In [17]:
# Convert inputs and targets to tensors
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [18]:
# Weights and biases
w = torch.randn(2, 3, requires_grad=True)        #this means 3 input units and 2 output units
b = torch.randn(2, requires_grad=True)
print(w)
print(b)

tensor([[ 0.6222, -0.1289, -0.2040],
        [ 1.8580,  0.7186, -0.8996]], requires_grad=True)
tensor([0.2805, 0.5616], requires_grad=True)


In [19]:
def model(x):
    return x @ w.t() + b


In [20]:
# Generate predictions
preds = model(inputs)
print(preds)

tensor([[ 28.2862, 145.6617],
        [ 32.4930, 175.3057],
        [ 25.2969, 206.3257],
        [ 50.6472, 187.6960],
        [ 16.5502, 134.7803]], grad_fn=<AddBackward0>)


In [21]:
# Compare with targets
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [22]:
# MSE loss
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

In [23]:
# Compute loss
loss = mse(preds, targets)
print(loss)

tensor(5977.6475, grad_fn=<DivBackward0>)


In [24]:
# Compute gradients
loss.backward() #compute the gradient or derivative of the loss w.r.t. to the weights and biases

In [25]:
# Gradients for weights
print(w)
print(w.grad)

tensor([[ 0.6222, -0.1289, -0.2040],
        [ 1.8580,  0.7186, -0.8996]], requires_grad=True)
tensor([[-3526.4873, -5149.8027, -2944.4927],
        [ 7024.8579,  5885.7437,  3788.4565]])


## Adjusting weight and bias to reduce the loss

In [26]:
w
w.grad

tensor([[-3526.4873, -5149.8027, -2944.4927],
        [ 7024.8579,  5885.7437,  3788.4565]])

In [27]:
# we don't modify the weights by a very large amount.
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
#This number is called the learning rate of the algorithm.

In [28]:
# Let's verify that the loss is actually lower
loss = mse(preds, targets)
print(loss)

tensor(5977.6475, grad_fn=<DivBackward0>)


In [29]:
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


## Training


In [30]:
# Generate predictions
preds = model(inputs)
print(preds)

tensor([[ 35.5774, 134.9603],
        [ 42.1189, 161.3082],
        [ 36.9739, 190.1291],
        [ 57.5486, 176.5972],
        [ 25.9888, 121.6301]], grad_fn=<AddBackward0>)


In [31]:
# Calculate the loss
loss = mse(preds, targets)
print(loss)

tensor(4646.6489, grad_fn=<DivBackward0>)


In [32]:
# Compute gradients
loss.backward()
print(w.grad)
print(b.grad)

tensor([[-2770.6221, -4329.1655, -2439.9111],
        [ 5924.1567,  4713.9888,  3063.1436]])
tensor([-36.5585,  64.9250])


In [33]:
# Adjust weights & reset gradients
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

In [34]:
print(w)
print(b)

tensor([[ 0.6851, -0.0342, -0.1502],
        [ 1.7285,  0.6126, -0.9681]], requires_grad=True)
tensor([0.2813, 0.5601], requires_grad=True)


In [35]:
# Calculate loss
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

#we can observe that loss has reduced

tensor(3742.6836, grad_fn=<DivBackward0>)


## Training on multiple epoch

In [36]:
# Train for 100 epochs
for i in range(1000):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [37]:
# Calculate loss
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(35.0655, grad_fn=<DivBackward0>)


In [38]:
# Predictions
preds

tensor([[ 57.4486,  71.0908],
        [ 80.3699,  95.0514],
        [122.4160, 144.4824],
        [ 22.2560,  40.4023],
        [ 98.0938, 107.4954]], grad_fn=<AddBackward0>)

In [39]:
# Targets
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])