In [1]:
import numpy as np
import torch
import tensorflow as tf

In [2]:
inputs = np.array([[73, 67, 43],
                   [91, 88, 64],
                   [87, 134, 58],
                   [102, 43, 37],
                   [69, 96, 70]], dtype='float32')

In [3]:
targets = np.array([[56],
                   [81],
                   [119],
                   [22],
                   [103]],dtype = "float32")
targets.shape

(5, 1)

In [4]:
inputs = torch.from_numpy(inputs)
type(inputs)
targets = torch.from_numpy(targets)
type(targets)

torch.Tensor

# Linear Regression Implementation

In [5]:
weight = torch.randn(2, 3, requires_grad=True)
bias = torch.randn(2, requires_grad=True)
print(weight)
print(bias)


tensor([[ 1.9419,  0.2796, -0.6441],
        [ 0.0491, -1.7133, -0.1375]], requires_grad=True)
tensor([-0.8677, -0.2442], requires_grad=True)


In [6]:
print(type(inputs))
type(weight.t())

<class 'torch.Tensor'>


torch.Tensor

In [7]:

torch.matmul(inputs,weight.t()) + bias

tensor([[ 131.9233, -117.3595],
        [ 159.2214, -155.3412],
        [ 168.1791, -233.5236],
        [ 185.3928,  -73.9913],
        [ 114.8718, -170.9530]], grad_fn=<AddBackward0>)

In [8]:
inputs

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])

In [9]:
def model(inputs):
    return torch.matmul(inputs,weight.t()) + bias
prediction = model(inputs)
print(prediction.shape)
print(type(prediction))
prediction


torch.Size([5, 2])
<class 'torch.Tensor'>


tensor([[ 131.9233, -117.3595],
        [ 159.2214, -155.3412],
        [ 168.1791, -233.5236],
        [ 185.3928,  -73.9913],
        [ 114.8718, -170.9530]], grad_fn=<AddBackward0>)

## Loss Finction

In [10]:
# print(prediction)
# print(targets)
dif = prediction - targets
print(dif)
print(torch.sum(dif*dif) / dif.numel())

tensor([[  75.9233, -173.3595],
        [  78.2214, -236.3412],
        [  49.1791, -352.5236],
        [ 163.3928,  -95.9913],
        [  11.8718, -273.9530]], grad_fn=<SubBackward0>)
tensor(33558.7812, grad_fn=<DivBackward0>)


In [11]:
def MSE(a,b):
    diff = a-b
    return torch.sum(diff*diff) / diff.numel()
mse = MSE(prediction,targets)
mse

tensor(33558.7812, grad_fn=<DivBackward0>)

## Compute Gradients

In [12]:
mse.backward()
print("Weight grad:\n" , weight.grad)
print("Bias grad:\n" , bias.grad)

Weight grad:
 tensor([[  6884.8716,   5345.1865,   3599.9648],
        [-18705.1426, -22015.6777, -13151.0098]])
Bias grad:
 tensor([  75.7177, -226.4337])


## Trainning using gradient descent

In [13]:
with torch.no_grad():
    weight -= weight.grad * 1e-5
    bias -= bias.grad * 1e-5
    weight.grad.zero_()
    bias.grad.zero_()

In [14]:
prediction = model(inputs)
mse = MSE(prediction, targets)
mse

tensor(23572.3340, grad_fn=<DivBackward0>)

In [15]:
for i in range(100):
    prediction = model(inputs)
    mse = MSE(prediction, targets)
    mse.backward()
    with torch.no_grad():
        weight -= weight.grad * 1e-5
        bias -= bias.grad * 1e-5
        weight.grad.zero_()
        bias.grad.zero_()

In [16]:
prediction = model(inputs)
mse = MSE(prediction, targets)
print("currr mse:\t",mse)
print(prediction)
print(targets)

currr mse:	 tensor(905.4383, grad_fn=<DivBackward0>)
tensor([[ 67.4012,  65.6756],
        [ 80.0241,  89.3095],
        [107.1987,  88.8570],
        [ 80.5104,  70.9118],
        [ 63.2697,  85.1508]], grad_fn=<AddBackward0>)
tensor([[ 56.],
        [ 81.],
        [119.],
        [ 22.],
        [103.]])
