In [2]:
from __future__ import print_function
import torch

In [3]:
x = torch.empty(5,3)
print(x)
x.shape

tensor([[1.3100e-08, 4.5584e-41, 1.3100e-08],
        [4.5584e-41, 5.8337e-10, 1.3567e-19],
        [1.4584e-19, 7.8447e+17, 1.3556e-19],
        [1.3563e-19, 1.3563e-19, 1.8561e-19],
        [1.4584e-19, 2.4756e-12, 4.1411e-11]])


torch.Size([5, 3])

In [4]:
import numpy as np
import torch

In [5]:
inputs = np.array([[73,67,43],
                  [91,88,64],
                  [87,134,58],
                  [102,43,37],
                  [69,96,70]],dtype='float32')
print(inputs)
outputs = np.array([[56,70],
                   [81,101],
                   [119,133],
                   [22,37],
                   [103,119]], dtype='float32')
print(outputs)

[[ 73.  67.  43.]
 [ 91.  88.  64.]
 [ 87. 134.  58.]
 [102.  43.  37.]
 [ 69.  96.  70.]]
[[ 56.  70.]
 [ 81. 101.]
 [119. 133.]
 [ 22.  37.]
 [103. 119.]]


In [6]:
import torch
inputs = torch.from_numpy(inputs)
outputs = torch.from_numpy(outputs)
print(inputs)
print(outputs)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [7]:
w = torch.randn(2,3,requires_grad=True)
b = torch.randn(2,requires_grad=True)
print(w)
print(b)

tensor([[-1.7557,  0.3168, -0.9198],
        [ 0.9427, -1.4483,  2.0614]], requires_grad=True)
tensor([2.3706, 1.3147], requires_grad=True)


In [8]:
def model(x):
    return x@w.t()+b

In [9]:
pred = model(inputs)
print(pred)

tensor([[-144.1240,   61.7429],
        [-188.3903,   91.5886],
        [-161.2746,    8.8292],
        [-197.1255,  111.4723],
        [-152.7481,   71.6305]], grad_fn=<AddBackward0>)


In [10]:
print(outputs)

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [13]:
print(pred)

tensor([[-144.1240,   61.7429],
        [-188.3903,   91.5886],
        [-161.2746,    8.8292],
        [-197.1255,  111.4723],
        [-152.7481,   71.6305]], grad_fn=<AddBackward0>)


In [15]:
def mse(x,y):
    diff = x-y
    return torch.sum(diff*diff)/diff.numel()

In [16]:
loss = mse(pred, outputs)
print(loss)

tensor(32796.2812, grad_fn=<DivBackward0>)


In [19]:
loss.backward()

In [20]:
print(w.grad)

tensor([[-20700.9727, -21729.1328, -13622.4482],
        [ -1586.8774,  -3873.0967,  -1743.9363]])


In [21]:
print(w)

tensor([[-1.7557,  0.3168, -0.9198],
        [ 0.9427, -1.4483,  2.0614]], requires_grad=True)


In [22]:
w.grad.zero_()
b.grad.zero_()
print(w.grad)
print(b.grad)

tensor([[0., 0., 0.],
        [0., 0., 0.]])
tensor([0., 0.])


In [23]:
preds = model(inputs)
print(preds)

tensor([[-144.1240,   61.7429],
        [-188.3903,   91.5886],
        [-161.2746,    8.8292],
        [-197.1255,  111.4723],
        [-152.7481,   71.6305]], grad_fn=<AddBackward0>)


In [24]:
loss = mse(preds, outputs)
print(loss)

tensor(32796.2812, grad_fn=<DivBackward0>)


In [25]:
loss.backward()
print(w.grad)
print(b.grad)

tensor([[-20700.9727, -21729.1328, -13622.4482],
        [ -1586.8774,  -3873.0967,  -1743.9363]])
tensor([-244.9325,  -22.9473])


In [26]:
with torch.no_grad():
    w -= w.grad*1e-5
    b -= b.grad*1e-5
    w.grad.zero_()
    b.grad.zero_()
    

In [27]:
print(w)
print(b)

tensor([[-1.5487,  0.5341, -0.7836],
        [ 0.9586, -1.4095,  2.0789]], requires_grad=True)
tensor([2.3731, 1.3150], requires_grad=True)


In [29]:
preds = model(inputs)
loss = mse(preds, outputs)
print(loss)

tensor(22716.6875, grad_fn=<DivBackward0>)


we can notice that the loss has already been reduced.

In [30]:
for i in range(100):
    preds = model(inputs)
    loss = mse(preds, outputs)
    loss.backward()
    with torch.no_grad():
        w -=w.grad*1e-5
        b -=b.grad*1e-5
        w.grad.zero_()
        b.grad.zero_()

In [None]:
This loop of training has a problem that the loss backward has to be executed at least once before.

In [31]:
preds = model(inputs)
loss = mse(preds, outputs)
print(loss)

tensor(631.4207, grad_fn=<DivBackward0>)


We can notice that the loss drop from 30000 to 631, which means that the training is efficient.

# Comparison between prediction and target value after training 100 epochs

In [33]:
print(preds)
print(outputs)

tensor([[ 56.0518,  78.9829],
        [ 74.2470, 117.8037],
        [138.5938,  80.0511],
        [ 11.5655,  87.2906],
        [ 94.0963, 119.8642]], grad_fn=<AddBackward0>)
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


# Use pytorch internal function

In [34]:
import torch.nn as nn

In [35]:
inputs = np.array([[73,67,43],[91,88,64],[87,134,58],
                  [102,43,37],[69,96,70],[73,67,43],
                  [91,88,64],[87,134,58],[102,43,37],
                  [69,96,70],[73,67,43],[91,88,64],
                  [87,134,58],[102,43,37],[69,96,70]],dtype='float32')
targets = np.array([[56,70],[81,101],[119,133],[22,37],[103,119],[56,70],[81,101],[119,133],[22,37],[103,119],[56,70],[81,101],[119,133],[22,37],[103,119]],dtype = 'float32')
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)
print(inputs)
print(targets)

tensor([[ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.],
        [ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.],
        [ 73.,  67.,  43.],
        [ 91.,  88.,  64.],
        [ 87., 134.,  58.],
        [102.,  43.,  37.],
        [ 69.,  96.,  70.]])
tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])


In [36]:
from torch.utils.data import TensorDataset

In [38]:
train_dataset = TensorDataset(inputs, targets)
train_dataset[0:3]#visit raw

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]), tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

In [39]:
from torch.utils.data import DataLoader

In [41]:
batch_size = 5
train_dl = DataLoader(train_dataset, batch_size, shuffle = True)

In [42]:
for x,y in train_dl:
    print(x)
    print(y)
    break

tensor([[102.,  43.,  37.],
        [ 69.,  96.,  70.],
        [ 73.,  67.,  43.],
        [ 73.,  67.,  43.],
        [ 69.,  96.,  70.]])
tensor([[ 22.,  37.],
        [103., 119.],
        [ 56.,  70.],
        [ 56.,  70.],
        [103., 119.]])


# Linear Model Initializaiton through nn.Linear

In the previous case, we initialize the weight matrix and bias matrix manually.

In this case, we use nn.Linear for initialization

In [44]:
model = nn.Linear(3,2)
print(model.weight)
print(model.bias)

Parameter containing:
tensor([[-0.3929, -0.0334, -0.3824],
        [-0.0422, -0.4724, -0.1078]], requires_grad=True)
Parameter containing:
tensor([-0.0312,  0.2232], requires_grad=True)


In [46]:
list(model.parameters())

[Parameter containing:
 tensor([[-0.3929, -0.0334, -0.3824],
         [-0.0422, -0.4724, -0.1078]], requires_grad=True),
 Parameter containing:
 tensor([-0.0312,  0.2232], requires_grad=True)]

In [47]:
preds = model(inputs)
preds

tensor([[-47.3954, -39.1428],
        [-63.1998, -52.0863],
        [-60.8709, -72.9989],
        [-55.6935, -28.3840],
        [-57.1175, -55.5831],
        [-47.3954, -39.1428],
        [-63.1998, -52.0863],
        [-60.8709, -72.9989],
        [-55.6935, -28.3840],
        [-57.1175, -55.5831],
        [-47.3954, -39.1428],
        [-63.1998, -52.0863],
        [-60.8709, -72.9989],
        [-55.6935, -28.3840],
        [-57.1175, -55.5831]], grad_fn=<AddmmBackward>)

In [49]:
#mse_loss
import torch.nn.functional as F

In [50]:
loss_fn = F.mse_loss

In [51]:
loss = loss_fn(model(inputs), targets)
loss

tensor(20804.9043, grad_fn=<MseLossBackward>)

In [52]:
#define optimizer
opt = torch.optim.SGD(model.parameters(), lr = 1e-5)

In [53]:
#utility functions to train the model
def train(num_epochs, model, loss_fn, opt):
    for epoch in range(num_epochs):
        for xb, yb in train_dl:
            preds = model(xb)
            loss = loss_fn(preds, yb)
            loss.backward()
            opt.step()
            opt.zero_grad()
        if(epoch+1)%10==0:
            print('Epoch[{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

In [56]:
train(100, model, loss_fn, opt)

Epoch[10/100], Loss: 32.1938
Epoch[20/100], Loss: 34.7805
Epoch[30/100], Loss: 32.3428
Epoch[40/100], Loss: 20.8161
Epoch[50/100], Loss: 36.4683
Epoch[60/100], Loss: 24.4008
Epoch[70/100], Loss: 13.9118
Epoch[80/100], Loss: 11.8027
Epoch[90/100], Loss: 10.0273
Epoch[100/100], Loss: 5.3903


In [57]:
preds = model(inputs)
preds

tensor([[ 57.4521,  70.6879],
        [ 79.5966,  98.6405],
        [123.4059, 136.4301],
        [ 23.0977,  39.1925],
        [ 96.2527, 114.4198],
        [ 57.4521,  70.6879],
        [ 79.5966,  98.6405],
        [123.4059, 136.4301],
        [ 23.0977,  39.1925],
        [ 96.2527, 114.4198],
        [ 57.4521,  70.6879],
        [ 79.5966,  98.6405],
        [123.4059, 136.4301],
        [ 23.0977,  39.1925],
        [ 96.2527, 114.4198]], grad_fn=<AddmmBackward>)

In [58]:
targets#compare with targets after training

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.]])

In [59]:
import jovian
jovian.commit()

ModuleNotFoundError: No module named 'jovian'