# Linear Regression

### training data

In [1]:
import numpy as np
import pandas as pd 
import torch 
import math

In [2]:
# Input(temp,rainfall, humidity)
inputs = np.array([[73,67,43],
                [91,88,64],
                [87,134,58],
                [102,43,37],
                [69,96,70]], dtype = 'float32')

In [3]:
#targets (apples,oranges)
targets = np.array([[56,70],
                  [81,101], 
                  [119,133], 
                  [22,37], 
                  [103,119]], dtype='float32')

In [4]:
# convert inputs and targets to tensors
tens1= torch.tensor(inputs)
tens2 = torch.tensor(targets)

In [5]:
#weighrs and biases
w = torch.randn(2,3, requires_grad = True)
b = torch.randn(2, requires_grad = True)
print(w)
print(b)

tensor([[ 0.1548,  1.0396,  1.8693],
        [-0.9566, -0.1036, -0.3296]], requires_grad=True)
tensor([0.2634, 1.3578], requires_grad=True)


##### we can define the model as

In [6]:
def model(x):
    return x @ w.t() + b

In [7]:
#Generate predictions
preds = model(tens1)
preds

tensor([[ 161.5929,  -89.5891],
        [ 225.4655, -115.9053],
        [ 261.4504, -114.8664],
        [ 129.9154, -112.8674],
        [ 241.5933,  -97.6659]], grad_fn=<AddBackward0>)

In [8]:
#compare with target
targets

array([[ 56.,  70.],
       [ 81., 101.],
       [119., 133.],
       [ 22.,  37.],
       [103., 119.]], dtype=float32)

### Loss function

In [9]:
# MSE Loss
def mse(t1, t2):
    diff = t1-t2
    return torch.sum(diff*diff) / diff.numel()


print(preds)
print(tens2)

tensor([[ 161.5929,  -89.5891],
        [ 225.4655, -115.9053],
        [ 261.4504, -114.8664],
        [ 129.9154, -112.8674],
        [ 241.5933,  -97.6659]], grad_fn=<AddBackward0>)
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [10]:
# compute loss 
loss = mse(preds,tens2)
print(loss)

tensor(28652.4746, grad_fn=<DivBackward0>)


In [11]:
s_d = math.sqrt(loss)
s_d

169.2704185892355

# Compute Gradients

In [12]:
#compute graidents
loss.backward()
print(w)
print(w.grad)

tensor([[ 0.1548,  1.0396,  1.8693],
        [-0.9566, -0.1036, -0.3296]], requires_grad=True)
tensor([[ 10763.6270,  11364.2715,   7148.5620],
        [-16637.8359, -18047.6914, -11166.4443]])


In [13]:
print(b)
print(b.grad)

tensor([0.2634, 1.3578], requires_grad=True)
tensor([ 127.8035, -198.1788])


If your gradient is Positive :
   increasing the weight will increase your loss 
   decreasing the weight will decrease your loss 

If your gradient is Negative :
   increasing the weight will decrease your loss 
   decreasing the weight will increase your loss

In [14]:
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


### Adjust Weights and biases using Gradients descent 

we'll reduce the loss and improve our model using the gradient descent optimization algorithm , which has the following steps :

1.Generate predictions

2.Calculate the loss

3.Compute gradients w.r.t the weights and biases

4.Adjust the weights by subtracting a small quantity proportional to the gradient

5.Reset the gradients to zero

Let's implement the above step by step

In [15]:
#generate predictions
preds = model(tens1)
preds

tensor([[ 161.5929,  -89.5891],
        [ 225.4655, -115.9053],
        [ 261.4504, -114.8664],
        [ 129.9154, -112.8674],
        [ 241.5933,  -97.6659]], grad_fn=<AddBackward0>)

In [16]:
#calculate the loss
loss = mse(preds,tens2)
print(loss)

tensor(28652.4746, grad_fn=<DivBackward0>)


In [17]:
#compute gradients
loss.backward()
print(w)
print(w.grad)
print(b.grad)

tensor([[ 0.1548,  1.0396,  1.8693],
        [-0.9566, -0.1036, -0.3296]], requires_grad=True)
tensor([[ 10763.6270,  11364.2715,   7148.5620],
        [-16637.8359, -18047.6914, -11166.4443]])
tensor([ 127.8035, -198.1788])


***Let's update the weights and biases using the gradients computed above***

In [18]:
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

In [19]:
print(w)
print(b)

tensor([[ 0.0471,  0.9259,  1.7978],
        [-0.7902,  0.0769, -0.2179]], requires_grad=True)
tensor([0.2621, 1.3598], requires_grad=True)


In [20]:
preds = model(tens1)
preds
loss = mse(preds,tens2)
print(loss)

tensor(19334.9902, grad_fn=<DivBackward0>)


#### Train for 100 epochs i.e 100 times


In [21]:
for i in range(450):
    preds = model(tens1)
    loss = mse(preds, tens2)
    loss.backward()
    with torch.no_grad(): 
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [22]:
# calculate loss 
preds = model(tens1)
loss = mse(preds, tens2)
print(loss)

tensor(25.0394, grad_fn=<DivBackward0>)


In [23]:
preds

tensor([[ 56.8265,  70.9768],
        [ 86.2150,  97.6547],
        [110.1228, 138.7474],
        [ 19.2936,  39.2163],
        [109.9864, 112.8774]], grad_fn=<AddBackward0>)

In [24]:
tens2

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

In [25]:
import torch.nn as nn

In [26]:
from torch.utils.data import TensorDataset

In [27]:
#Define dataset
train_ds = TensorDataset(tens1, tens2)
train_ds[0:3]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]),
 tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

In [28]:
#Data loader spreads the data in batches
from torch.utils.data import DataLoader

In [29]:
batch_size = 5
train_dl = DataLoader(train_ds,batch_size,shuffle =True)

In [30]:
for xb,yb in train_dl:
    print(xb)
    print(yb)
    break

tensor([[ 87., 134.,  58.],
        [ 91.,  88.,  64.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.],
        [ 73.,  67.,  43.]])
tensor([[119., 133.],
        [ 81., 101.],
        [ 22.,  37.],
        [103., 119.],
        [ 56.,  70.]])


In [31]:
#define model 
model = nn.Linear(3, 2)
print(model.weight)
print(model.bias)

Parameter containing:
tensor([[ 0.3039, -0.0392,  0.1419],
        [-0.0156,  0.3913, -0.1725]], requires_grad=True)
Parameter containing:
tensor([-0.4092,  0.0638], requires_grad=True)


In [32]:
# Parameters 
list(model.parameters())

[Parameter containing:
 tensor([[ 0.3039, -0.0392,  0.1419],
         [-0.0156,  0.3913, -0.1725]], requires_grad=True),
 Parameter containing:
 tensor([-0.4092,  0.0638], requires_grad=True)]

In [33]:
#Generate predictions 
preds = model(tens1)
preds

tensor([[25.2514, 17.7256],
        [32.8787, 22.0398],
        [29.0099, 41.1363],
        [34.1524,  8.9182],
        [26.7315, 24.4777]], grad_fn=<AddmmBackward0>)

In [34]:
# Import nn.functional
import torch.nn.functional as F

In [35]:
# Deifne loss fucntion
loss_fn = F.mse_loss

In [36]:
loss = loss_fn(model(tens1),tens2)
loss

tensor(4445.3223, grad_fn=<MseLossBackward0>)

In [37]:
#define optimizer
opt = torch.optim.SGD(model.parameters(),lr=1e-5)

In [38]:
#utility function to train the model
def fit(num_epochs, model, loss_fn, opt, train_dl):
    for epoch in range(num_epochs):
        for xb,yb in train_dl:
            pred = model(xb)
            loss = loss_fn(pred,yb)
            loss.backward()
            opt.step()
            opt.zero_grad()
        if(epoch+1)%10 == 0:
            print('Epoch [{}/{} Loss: {:.4f}'.format(epoch+1, num_epochs,loss.item()))

In [39]:
fit(100,model,loss_fn,opt,train_dl)

Epoch [10/100 Loss: 617.1467
Epoch [20/100 Loss: 451.9836
Epoch [30/100 Loss: 400.4481
Epoch [40/100 Loss: 356.8004
Epoch [50/100 Loss: 318.3204
Epoch [60/100 Loss: 284.3585
Epoch [70/100 Loss: 254.3761
Epoch [80/100 Loss: 227.8995
Epoch [90/100 Loss: 204.5120
Epoch [100/100 Loss: 183.8462


## Testing 

In [41]:
model(torch.tensor([[96.2,4.0,40.1]]))

tensor([[28.8499, 18.9832]], grad_fn=<AddmmBackward0>)