In [26]:
#the training data can be represented using two matrices, inputs and targets, each with one row per observation, 
#and one collumn per variable

In [27]:
pip install torch

Note: you may need to restart the kernel to use updated packages.


In [28]:
import numpy as np
import torch


In [29]:
#input(temp,rainfall,humidity representing the respectively as follows)
inputs = np.array([[73,67,43],
                   [91,88,64],
                   [87,134,58],
                   [102,43,37],
                   [69,96,70]], dtype="float32")

In [30]:
#targets(apples,oranges)
targets = np.array([[56,70],
                    [81,101],
                    
                    [119,133],
                    [22,37],
                    [103,119]], dtype="float32")

In [31]:
#you then convert inputs and targets from numpy to tensors
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)



tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [32]:
#using the hypothesis func, we estimate the crop yield. the function is as follows
#yield_apple = w11 * temp + w12 * rainfall + w13 * humidity + b1
#yield_orangr = w21 * temp + w22 * rainfall + w23 * humidity + b2
#where b1 and b2 arecontrols error in the model
#the above function for both crops can be represented with a matrix of two rows and three columns,
#with b1 and b2 being vectors known as the bias.The input values form a matrix and the weghts also form a matrix.
#since we do not know the values of the bias and features(wights), 
#we figure out thei values by tarting with random valiues as follow

In [33]:
w = torch.randn(2,3, requires_grad=True)
b  = torch.randn(2, requires_grad=True)
print(w)
print(b)

tensor([[-0.1554, -0.9765, -0.7454],
        [-1.2709,  1.4003,  1.6327]], requires_grad=True)
tensor([ 0.3514, -1.7408], requires_grad=True)


In [34]:
#our model in this case is simply a function that perfoms matrix multiplication between
#the inputs and the weghts w (transposed), and then it adds the bias b
#we transpose the weights using the .t() method

In [35]:
#we then define the model as follows
def model(x):
    return x @ w.t() + b

    

In [36]:
preds = model(inputs)
print(preds)


tensor([[-108.4757,   69.5067],
        [-147.4344,  110.3224],
        [-187.2601,  170.0215],
        [ -85.0742,  -10.7511],
        [-156.2996,  159.2799]], grad_fn=<AddBackward0>)


In [37]:
print(targets)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [38]:
#we can see that there is a huge difference between predicted values and actual values.
#this is because we just initialized random values as weight and bias
#we therfore are going to  use the loss function approach to improve this model by following the procedure belo
#-calculate the diference between the predicted and target matrixes
#-square all elements of the difference matrix to remove negative values
#-calculate  the average of the elements in the resulting matrix 

In [39]:
diff = preds - targets
diff_sqr = diff*diff
torch.sum(diff_sqr)/diff.numel()

tensor(25709.1289, grad_fn=<DivBackward0>)

In [40]:
def mse(t1,t2):
    diff = t1-t2
    return torch.sum(diff*diff)/diff.numel()

In [41]:
#the loss tells us how much data the model is losing, the lower the loss, the better the model and vice versa
loss = mse(preds,targets)
print(loss)

tensor(25709.1289, grad_fn=<DivBackward0>)


In [42]:
#we now we compute the gradient of the loss 
#the loss is a quadratic function of our wghts and biases, and our objective is to find the set of weghts where the 
#loss is the lowest on the loss weight graph. A key insight from calculus is that the gradient of the graph of  the loss function indicates
#the rate of change of the loss, or the slope of the loss  with respect to our weight and biases
#A possitive gradient means that a slight increase in the weight or bias, depending on the graph parameters, will 
#increase the loss and vice versa.
#This is the scenario behind the gradient decent optimization algorithm for model improvement

In [43]:
loss.backward()

In [44]:
print(w)
print(w.grad)


tensor([[-0.1554, -0.9765, -0.7454],
        [-1.2709,  1.4003,  1.6327]], requires_grad=True)
tensor([[-17650.4258, -20331.5781, -12313.6113],
        [   388.3812,   1512.3563,    755.0951]])


In [45]:
print(b)
print(b.grad)

tensor([ 0.3514, -1.7408], requires_grad=True)
tensor([-213.1088,    7.6759])


In [46]:
#before we proceed we reset the gradients to zero by calling .zero_() method on the weights and biases
#this is because pytorch accumulates gradients.
#that is, the next time we call .backward() method on the loss, the new gradient value will be added to the existing one,
#which may lead to unexpected results

In [47]:
#we will now predict the values,although we started with random numbers,
#we will improve them to fit our model using gradient decent
#we will adjust the weigt and biases using the gradient descent algorithm following the procedure below
#-generate predictions
#-calculate the loss
#-compute gradients
#-adjust the weight by subtracting a small quantity proportional to the gradient
#-reset the gradient to zero

In [48]:
#-we continue with stage 4 since we are done with first three stages


In [49]:
#we use torch.no-grad to indicate that pytorch should not track,calculate
#or modify gradients with updating the weghts and biases. we then multiply the weight and biase with a very small number since
#we do not want to ensure we do not modify the weight by a multiplyying with a large number.we then set weight back to zero
with torch.no_grad():
    w -= w.grad*1e-5
    b -= b.grad*1e-5
    w.grad.zero_()
    b.grad.zero_()

In [50]:
#we can see the the values of w and b have increased respectively
#therefore we have slightly changed our random values
print(w)
print(b)

tensor([[ 0.0211, -0.7732, -0.6223],
        [-1.2748,  1.3851,  1.6251]], requires_grad=True)
tensor([ 0.3536, -1.7409], requires_grad=True)


In [51]:
#we now calculate the loss again and compare to the previous loss.with the new w and b, the model should have lower loss
preds = model(inputs)
loss = mse(preds,targets)
print(loss)




tensor(17699.6152, grad_fn=<DivBackward0>)


In [None]:
#to reduce the loss function further, we can repeat the process of adjusting the ewights and biases using the gradient method
#each iteration is called epoch,lets train the model for 100 epochs.


In [53]:
for i in range(100):
    preds = model(inputs)
    loss = mse(preds,targets)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()
    

In [54]:
#we now calculate the loss again and compare with the previous
preds = model(inputs)
loss = mse(preds,targets)
print(loss)

tensor(354.2454, grad_fn=<DivBackward0>)


In [56]:
print(targets)
print(preds)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])
tensor([[ 63.9169,  64.6069],
        [ 82.5962, 101.0249],
        [106.9542, 141.3132],
        [ 59.5536,   5.9885],
        [ 80.2178, 137.5181]], grad_fn=<AddBackward0>)


In [None]:
#from the new obtained values, we can see that the predictios are quite closer than before
#we can still further adjust the values for more accuracy

In [1]:
import jovian

<IPython.core.display.Javascript object>

In [2]:
jovian.commit(filenam='linear Regression From Scratch')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

[31m[jovian] Error: Failed to read the Jupyter notebook. Please re-run this cell to try again. If the issue persists, provide the "filename" argument to "jovian.commit" e.g. "jovian.commit(filename='my-notebook.ipynb')"[0m
