In [53]:
import numpy as np
import torch
import math

In [29]:
# inputs temperature, rainfall, humidity
inputs = np.array([[73, 67, 43],[91, 88, 64],[87, 134,58],[102, 43,37],[69,96,70]], dtype='float32')
print(inputs.size)
inpt = torch.from_numpy(inputs)
inpt

15


tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])

In [4]:
# apples, oranges
targets = np.array([[56,70], [81,101], [119,133], [22,37], [103,119]], dtype='float32')

In [11]:
w = torch.randn((2, 3), requires_grad=True)
b  = torch.randn(2, requires_grad=True)
print(w)
print(b)

tensor([[-0.3757, -1.3237,  0.4534],
        [ 0.1642, -0.7022,  0.2615]], requires_grad=True)
tensor([2.4079, 0.5795], requires_grad=True)


In [12]:
 def model(x):
        return x @ w.t() + b

In [23]:
#  Generate Predictions
predict = model(inpt)
print(predict)

tensor([[ -94.2127,  -23.2412],
        [-119.2523,  -29.5418],
        [-181.3609,  -64.0707],
        [ -76.0601,   -3.1948],
        [-118.8556,  -37.2030]], grad_fn=<AddBackward0>)


In [22]:
# Compare with targets 
print(targets)
target = torch.from_numpy(targets)
print(target)

[[ 56.  70.]
 [ 81. 101.]
 [119. 133.]
 [ 22.  37.]
 [103. 119.]]
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [35]:
# Evaluate to check how well the model works by calculating the difference in predict and target

diff = ( predict - target) #element wise subtraction
print(diff) 
torch.sum(diff * diff)/ diff.numel()# normal multiplication and not matrix multiplication
# Above we have calculated loss function using Mean Square Error
# First - calculate diffrence between two matrices (predict and target)
# Second - Calculate square of each element to remove negative values
# Third - Average of all elements of the resultant amtrix

tensor([[-150.2127,  -93.2412],
        [-200.2523, -130.5418],
        [-300.3609, -197.0707],
        [ -98.0601,  -40.1948],
        [-221.8556, -156.2030]], grad_fn=<SubBackward0>)


tensor(30230.4160, grad_fn=<DivBackward0>)

In [37]:
# Using above create loss function
# Loss is function of weights as we change weights the loss also changes
def mse(t1, t2):
    diff = t1 - t2
    return torch.sum(diff*diff)/diff.numel()

loss = mse(predict, target)
print(loss)

tensor(30230.4160, grad_fn=<DivBackward0>)


In [43]:
x = 30230.4160
math.sqrt(x)
# Difference of preidction is lying between 50-200 which makes the model bad 

173.86896215253603

In [44]:
# To improve the model we will use gradient descent
loss.backward()

# Compute partial derivative of weight  which ets stored in .grad 
print(w)
print(w.grad)

tensor([[-0.3757, -1.3237,  0.4534],
        [ 0.1642, -0.7022,  0.2615]], requires_grad=True)
tensor([[-16126.0107, -18689.9102, -11170.8691],
        [-10141.7871, -12173.2363,  -7243.1133]])


 #### Gradient indicates the rate of change of loss i.e. slope of the loss function w.r.t. weights and biases
if gradient is positive 
 increasing element value will increase loss and vice versa.
if gradient is negative 
    increasing element value will decrease loss and vice versa. 
this will form basis for optimization algorithm used for improving the model

In [46]:
# Use .zero_() to reset the gradients. To avoid new gradients to get added along with existing gradients which can lead to different result
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


In [48]:
# even after removing grad both have same values
#w, b

Optimization algorithm by adjusting weight and biases


In [73]:
# Generate predictions
predict = model(inpt)
print(predict)

tensor([[ 59.5357,  72.7002],
        [ 86.8308, 101.4836],
        [104.3734, 127.2662],
        [ 32.8194,  50.2676],
        [103.5903, 112.9339]], grad_fn=<AddBackward0>)


In [74]:
# Calculate loss
loss = mse(predict, target)
print(loss)

tensor(63.1073, grad_fn=<DivBackward0>)


In [75]:
# Compute Gradients of weights and biases
loss.backward()
print(w.grad)
print(b.grad)

tensor([[ 132.1001, -137.6123,   23.7000],
        [ 135.4027, -111.3405,  -23.8459]])
tensor([2.4598, 1.8606])


In [76]:
# Adjust weights by subtracting small quantity propotinal to gradient from it
# Adjust and reset gradients
with torch.no_grad():
    w -= w.grad * 1e-5
    b -= b.grad * 1e-5
    w.grad.zero_()
    b.grad.zero_()

In [77]:
print(w)
print(b)

tensor([[-0.2951,  0.4952,  1.0575],
        [-0.0719,  0.6566,  0.7758]], requires_grad=True)
tensor([2.4120, 0.5807], requires_grad=True)


In [78]:
# calculate loss
predict = model(inpt)
loss = mse(predict, target)
print(loss)
# Compare with previous loss

tensor(62.4269, grad_fn=<DivBackward0>)


In [83]:
# Train for multiple epochs
for i in range(150):
    predict = model(inpt)
    loss = mse(predict, target)
    loss.backward()
    with torch.no_grad():
        w -= w.grad * 1e-5
        b -= b.grad * 1e-5
        w.grad.zero_()
        b.grad.zero_()

In [84]:
# Calculate loss
predict = model(inpt)
loss = mse(predict, target)
print(loss)

tensor(7.5053, grad_fn=<DivBackward0>)


In [85]:
# Predictions
predict

tensor([[ 57.6368,  70.8044],
        [ 84.5405, 100.2384],
        [112.6350, 133.1458],
        [ 21.6831,  39.2540],
        [106.1042, 117.1945]], grad_fn=<AddBackward0>)

In [82]:
# Targets 
target
# predictions can be made close by increasing epochs

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])