In [1]:
import torch
import numpy as np

## Training Data

In [2]:
# Temp, rainfall, humidity
inputs = np.array([[73,67,43],
                   [91,88,64],
                   [87,134,58],
                   [102,43,37],
                   [69,96,70]], dtype='float32')

In [3]:
# Apples, oranges
targets = np.array([[56,70],
                   [81,101],
                   [119,133],
                   [22,37],
                   [103,119]], dtype='float32')

In [4]:
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

## Linear Regression from scratch

In [5]:
w = torch.randn(2,3, requires_grad=True)
b = torch.randn(2, requires_grad=True)
print(w)
print(b)

tensor([[ 0.5071, -0.4351,  0.6368],
        [-0.1208, -1.1941, -0.0266]], requires_grad=True)
tensor([-0.0269,  0.8429], requires_grad=True)


In [6]:
def model(x):
    return x @ w.t() + b

In [7]:
# Generate predictions
preds = model(inputs)
print(preds)

tensor([[  35.2182,  -89.1195],
        [  48.5808, -116.9276],
        [  22.7152, -171.2118],
        [  56.5457,  -63.8042],
        [  37.7650, -123.9832]], grad_fn=<AddBackward0>)


In [8]:
# Compare with targets
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


## Loss function

In [9]:
diff = preds - targets
print(diff)
print(diff * diff) # not a matrix multiplication
print(torch.sum(diff * diff))
print(diff.numel())

tensor([[ -20.7818, -159.1195],
        [ -32.4192, -217.9276],
        [ -96.2848, -304.2118],
        [  34.5457, -100.8042],
        [ -65.2350, -242.9832]], grad_fn=<SubBackward0>)
tensor([[  431.8817, 25319.0215],
        [ 1051.0046, 47492.4453],
        [ 9270.7598, 92544.8125],
        [ 1193.4086, 10161.4805],
        [ 4255.6035, 59040.8438]], grad_fn=<MulBackward0>)
tensor(250761.2656, grad_fn=<SumBackward0>)
10


In [10]:
# MSE Loss
def mse(t1, t2):
    # t1 is pred tensor
    # t2 is target tensor
    diff = t1 - t2
    return torch.sum(diff * diff) / diff.numel()

In [11]:
# Compute Loss
loss = mse(preds, targets)
print(loss)

tensor(8420.7422, grad_fn=<DivBackward0>)


### Compute gradients

In [14]:
# With torch we can have the gradients very easily

In [16]:
loss.backward()

In [18]:
print(w)
print(w.grad)

tensor([[-0.8944, -0.4684, -1.5831],
        [ 0.5632, -0.6157,  0.3625]], requires_grad=True)
tensor([[-23193.6074, -25514.1836, -15764.5430],
        [ -6185.3369,  -8140.5293,  -4708.6377]])


In [19]:
print(b)
print(b.grad)

tensor([0.3749, 0.3814], requires_grad=True)
tensor([-277.5257,  -77.0642])


In [20]:
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


## Adjust weights and biases using gradient descent

1. Generate preds
2. Calculate the loss
3. Compute the gradients w.r.t the weights and biases
4. Adjust the weights by substracting a small quantity proportional to the gradient
5. Reset the gradients to zero

In [11]:
# Generate preds
preds = model(inputs)
print(preds)

tensor([[  35.2182,  -89.1195],
        [  48.5808, -116.9276],
        [  22.7152, -171.2118],
        [  56.5457,  -63.8042],
        [  37.7650, -123.9832]], grad_fn=<AddBackward0>)


In [12]:
# Calc the loss
loss = mse(preds, targets)
print(loss)

tensor(25076.1270, grad_fn=<DivBackward0>)


In [13]:
# Compute the gradients
loss.backward()
print(w.grad)
print(b.grad)

tensor([[ -2764.3081,  -4384.9043,  -2368.2437],
        [-16992.2852, -19652.7969, -11834.4746]])
tensor([ -36.0350, -205.0093])


In [14]:
# Adjust the weights by substracting a small quantity proportional to the gradient
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

In [15]:
# new weights and biases
print(w)
print(b)

tensor([[ 0.5347, -0.3913,  0.6605],
        [ 0.0492, -0.9975,  0.0917]], requires_grad=True)
tensor([-0.0265,  0.8449], requires_grad=True)


In [16]:
# Calc the loss
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(17358.0215, grad_fn=<DivBackward0>)


## Train for multiple epochs

In [26]:
for i in range(100):
    preds = model(inputs)
    loss = mse(preds, targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [27]:
# Calc the loss
preds = model(inputs)
loss = mse(preds, targets)
print(loss)

tensor(34.4762, grad_fn=<DivBackward0>)


In [28]:
preds

tensor([[ 58.8022,  72.3187],
        [ 84.6942, 101.0398],
        [110.4042, 128.8897],
        [ 30.7669,  47.7718],
        [100.5891, 113.6709]], grad_fn=<AddBackward0>)

In [29]:
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])