# Linear Regression with PyTorch

In [2]:
import numpy as np
import torch

# Prepare data

In [8]:
# Input (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43], 
                   [91, 88, 64], 
                   [87, 134, 58], 
                   [102, 43, 37], 
                   [69, 96, 70]], dtype='float32')

In [9]:
# Targets (apples, oranges)
targets = np.array([[56, 70], 
                    [81, 101], 
                    [119, 133], 
                    [22, 37], 
                    [103, 119]], dtype='float32')

In real project, input row data (CSV) as numpy array and then covert to tensor

In [13]:
# convert to tensors
inputs_tensor = torch.from_numpy(inputs)
targets_tensor = torch.from_numpy(targets)
inputs_tensor,targets_tensor

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.],
         [102.,  43.,  37.],
         [ 69.,  96.,  70.]]), tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.],
         [ 22.,  37.],
         [103., 119.]]))

Linear Regession Model

In [14]:
# init weights and biases
# torch.randn creates a tensor with the given shape,
# with elements picked randomly from a normal distribution
# with mean 0 and standard deviation 1.
w = torch.randn(2,3,requires_grad=True)
b = torch.randn(2,requires_grad=True)
w, b

(tensor([[-0.2973, -0.3848,  0.8391],
         [-0.8700,  1.0500,  0.9071]], requires_grad=True),
 tensor([-0.4208, -0.4247], requires_grad=True))

In [15]:
# define linear model
def model(x):
  return x @ w.t() + b
# @ represents matrix multiplication in PyTorch

In [40]:
# show predict
preds = model(inputs_tensor)
preds,targets_tensor

(tensor([[-11.8285,  45.4184],
         [ -7.6410,  70.8569],
         [-29.1889, 117.1927],
         [-16.2489, -10.4536],
         [  0.8556, 103.8394]], grad_fn=<AddBackward0>), tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.],
         [ 22.,  37.],
         [103., 119.]]))

# Loss function

In [41]:
# MSE Loss
def mse(pred, truth):
  diff = pred - truth
  return torch.sum(diff * diff) / diff.numel()
# .numel returns the number of elements in a tensor

In [42]:
# Compute Loss
loss = mse(preds,targets_tensor)
loss

tensor(5055.8750, grad_fn=<DivBackward0>)

# Compute gradients

In [43]:
# Compute gradients
loss.backward()

In [44]:
# gradients for weights
w, w.grad

(tensor([[-0.2973, -0.3848,  0.8391],
         [-0.8700,  1.0500,  0.9071]], requires_grad=True),
 tensor([[-7371.9189, -8730.5586, -5149.9844],
         [-2359.8115, -1982.7313, -1344.0027]]))

In [45]:
# !!!IMPORTANT!!!
# before update next grad. we need to calling .zero()
# cause PyTorch accuumlates gradients
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


# Adjust weights and biases using GD

In [46]:
# 1.Generate predictions
# 2.Calculate the loss
# 3.Compute gradients w.r.t the weights and biases
# 4.Adjust the weights by subtracting a small quantity proportional to the gradient
# 5.Reset the gradients to zero

# 1
preds = model(inputs_tensor)
# 2
loss = mse(preds,targets_tensor)
# 3
loss.backward()
# 4 & 5
with torch.no_grad():
  w -= w.grad * 1e-4
  b -= b.grad * 1e-4
  w.grad.zero_()
  b.grad.zero_()


torch.no_grad(*args, **kwargs)
Context-manager that disabled gradient calculation.

Disabling gradient calculation is useful for inference, when you are sure that you will not call :meth:Tensor.backward(). It will reduce memory consumption for computations that would otherwise have requires_grad=True.

In this mode, the result of every computation will have requires_grad=False, even when the inputs have requires_grad=True.

# Training

In [47]:
epochs = 100
lr = 1e-4

In [48]:
for e in range(epochs):
  preds = model(inputs_tensor)
  loss = mse(preds,targets_tensor)
  loss.backward()
  with torch.no_grad():
    w -= w.grad * lr
    b -= b.grad * lr
    w.grad.zero_()
    b.grad.zero_()
  print(e,loss)

0 tensor(3256.8401, grad_fn=<DivBackward0>)
1 tensor(2120.4082, grad_fn=<DivBackward0>)
2 tensor(1399.8331, grad_fn=<DivBackward0>)
3 tensor(940.5876, grad_fn=<DivBackward0>)
4 tensor(645.8569, grad_fn=<DivBackward0>)
5 tensor(454.9477, grad_fn=<DivBackward0>)
6 tensor(329.7801, grad_fn=<DivBackward0>)
7 tensor(246.4348, grad_fn=<DivBackward0>)
8 tensor(189.8629, grad_fn=<DivBackward0>)
9 tensor(150.5752, grad_fn=<DivBackward0>)
10 tensor(122.5698, grad_fn=<DivBackward0>)
11 tensor(102.0347, grad_fn=<DivBackward0>)
12 tensor(86.5345, grad_fn=<DivBackward0>)
13 tensor(74.5017, grad_fn=<DivBackward0>)
14 tensor(64.9165, grad_fn=<DivBackward0>)
15 tensor(57.1073, grad_fn=<DivBackward0>)
16 tensor(50.6239, grad_fn=<DivBackward0>)
17 tensor(45.1584, grad_fn=<DivBackward0>)
18 tensor(40.4953, grad_fn=<DivBackward0>)
19 tensor(36.4795, grad_fn=<DivBackward0>)
20 tensor(32.9963, grad_fn=<DivBackward0>)
21 tensor(29.9586, grad_fn=<DivBackward0>)
22 tensor(27.2980, grad_fn=<DivBackward0>)
23 ten

In [50]:
preds,targets_tensor

(tensor([[ 56.9277,  70.2232],
         [ 83.7411, 100.7819],
         [115.5842, 132.8410],
         [ 20.2831,  36.9423],
         [104.9720, 119.2919]], grad_fn=<AddBackward0>), tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.],
         [ 22.,  37.],
         [103., 119.]]))