In [1]:
import matplotlib.pyplot as plt

In [2]:
import torch
import numpy as np

## Creating inputs and outputs

Taking 5 rows of inputs having 3 features, outputs having 2 variables.
So, inputs are of shape 5x3 and outputs of shape 5x2.

In [20]:
inputs = np.array([[2, 50, -20],
              [10, 230, -110],
              [5, 120, -48],
              [7, 180, -80],
              [12, 300, -140]
             ], dtype='float32')

print(inputs)

[[   2.   50.  -20.]
 [  10.  230. -110.]
 [   5.  120.  -48.]
 [   7.  180.  -80.]
 [  12.  300. -140.]]


In [21]:
targets = np.array([[100, 300],
              [900, 2800],
              [430, 1200],
              [650, 1700],
              [1320, 4000]
             ], dtype='float32')

print(targets)

[[ 100.  300.]
 [ 900. 2800.]
 [ 430. 1200.]
 [ 650. 1700.]
 [1320. 4000.]]


### Converting into torch.tensor objects

In [22]:
X = torch.from_numpy(inputs)
Y = torch.from_numpy(targets)

## Creating model

### Initializing weights and bias

In [23]:
w = torch.randn(Y.shape[1],X.shape[1], requires_grad=True)
b = torch.randn(Y.shape[1], requires_grad=True)

In [24]:
print(w,'\n')
print(b)

tensor([[ 2.3669, -0.2281, -0.5640],
        [-1.9582,  0.7312, -1.5440]], requires_grad=True) 

tensor([-1.0605,  0.4080], requires_grad=True)


### Defining model

In [25]:
def model(X, w, b):
    return(X @ w.t() + b)

In [26]:
## Initial values

Y_pred = model(X, w, b)

print("Target values :\n",Y,'\n')
print("Predicted values :\n",Y_pred,'\n')

Target values :
 tensor([[ 100.,  300.],
        [ 900., 2800.],
        [ 430., 1200.],
        [ 650., 1700.],
        [1320., 4000.]]) 

Predicted values :
 tensor([[  3.5476,  63.9338],
        [ 32.1826, 318.8528],
        [ 10.4722, 152.4782],
        [ 19.5673, 241.8448],
        [ 37.8684, 412.4435]], grad_fn=<AddBackward0>) 



## Loss function

In [10]:
def mse(Y_pred, Y):
    diff = Y_pred - Y
    return(torch.sum(diff**2)/diff.numel())

In [11]:
loss = mse(Y_pred, Y)
print(loss)

tensor(3405528.7500, grad_fn=<DivBackward0>)


## Gradient Descent

### Computing gradients

In [12]:
loss.backward()

In [13]:
print(w.grad)
print(b.grad)

tensor([[  -7243.1382, -177195.7969,   81777.7422],
        [ -19461.8242, -475535.6875,  219845.7031]])
tensor([ -778.4398, -2062.6023])


### Adjusting weights and biases as per gradients

In [14]:
lr = 0.00001

In [15]:
with torch.no_grad():
    w -= w.grad * lr
    b -= b.grad * lr
    
    # Setting gradients to zero
    
    w.grad.zero_()
    b.grad.zero_()

### Check new loss

In [16]:
Y_pred = model(X, w, b)
loss_new = mse(Y_pred, Y)
print(loss)
print(loss_new)

tensor(3405528.7500, grad_fn=<DivBackward0>)
tensor(1005705., grad_fn=<DivBackward0>)


### Training for multiple epochs

In [17]:
epochs = 10000

In [18]:
for i in range(epochs):
    Y_pred = model(X, w, b)
    loss = mse(Y_pred, Y)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * lr
        b -= b.grad * lr
        w.grad.zero_()
        b.grad.zero_()

In [19]:
Y_pred = model(X, w, b)
loss = mse(Y_pred, Y)

print("Target values :\n",Y,'\n')
print("Predicted values :\n",Y_pred,'\n')
print("Loss = ", loss)

Target values :
 tensor([[ 100.,  300.],
        [ 900., 2800.],
        [ 430., 1200.],
        [ 650., 1700.],
        [1320., 4000.]]) 

Predicted values :
 tensor([[ 174.3331,  466.5924],
        [ 965.5428, 2940.3340],
        [ 420.8174, 1130.4347],
        [ 702.3077, 2040.3105],
        [1229.9076, 3689.5215]], grad_fn=<AddBackward0>) 

Loss =  tensor(28525.2461, grad_fn=<DivBackward0>)
