In [1]:
%matplotlib inline
import torch
from IPython import display
from matplotlib import pyplot as plt
import numpy as np
import random

print(torch.__version__)

1.3.0


In [2]:
# set input feature number 
num_inputs = 2
# set example number
num_examples = 1000

# set true weight and bias in order to generate corresponded label
true_w = [2, -3.4]
true_b = 4.2

features = torch.randn(num_examples, num_inputs,
                      dtype=torch.float32)
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()),
                       dtype=torch.float32)

In [3]:
plt.scatter(features[:, 1].numpy(), labels.numpy(), 1);

In [4]:
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices)  # random read 10 samples
    for i in range(0, num_examples, batch_size):
        j = torch.LongTensor(indices[i: min(i + batch_size, num_examples)]) # the last time may be not enough for a whole batch
        yield  features.index_select(0, j), labels.index_select(0, j)

In [15]:
batch_size = 10

for X, y in data_iter(batch_size, features, labels):
    print(X, '\n', y)
    break

tensor([[-3.1768, -0.1232],
        [-0.6662, -0.0613],
        [ 0.1475, -0.6895],
        [ 0.5243, -0.5755],
        [-0.9027,  0.5149],
        [-0.4269,  0.4762],
        [-0.4361, -1.2426],
        [ 0.0554, -0.4465],
        [-0.3154,  0.5607],
        [ 1.2315,  0.1099]]) 
 tensor([-1.7161,  3.0805,  6.8387,  7.2061,  0.6532,  1.7397,  7.5588,  5.8264,
         1.6551,  6.2811])


In [6]:
w = torch.tensor(np.random.normal(0, 0.01, (num_inputs, 1)), dtype=torch.float32)
b = torch.zeros(1, dtype=torch.float32)

w.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True)

tensor([0.], requires_grad=True)

In [7]:
def linreg(X, w, b):
    return torch.mm(X, w) + b

def squared_loss(y_hat, y): 
    return (y_hat - y.view(y_hat.size())) ** 2 / 2
    
def sgd(params, lr, batch_size): 
    for param in params:
        param.data -= lr * param.grad / batch_size

In [8]:
lr = 0.03
num_epochs = 5

net = linreg
loss = squared_loss

# training
for epoch in range(num_epochs):  # training repeats num_epochs times
    # in each epoch, all the samples in dataset will be used once
    
    # X is the feature and y is the label of a batch sample
    for X, y in data_iter(batch_size, features, labels):
        l = loss(net(X, w, b), y).sum()  
        # calculate the gradient of batch sample loss 
        l.backward()  
        # using small batch random gradient descent to iter model parameters
        sgd([w, b], lr, batch_size)  
        # reset parameter gradient
        w.grad.data.zero_()
        b.grad.data.zero_()
    train_l = loss(net(features, w, b), labels)
    print('epoch %d, loss %f' % (epoch + 1, train_l.mean().item()))

epoch 1, loss 0.030283
epoch 2, loss 0.000102
epoch 3, loss 0.000047
epoch 4, loss 0.000047
epoch 5, loss 0.000047


In [16]:
for x,y in data_iter(10, features, labels):
    x_test = x
    y_test = y

In [31]:
print(x_test)
x_view = x_test.view(-1)
print(x_view)

tensor([[-0.8610,  0.4798],
        [-1.1526,  0.6640],
        [ 0.4182,  1.9204],
        [ 0.8108, -2.0770],
        [-1.5355, -1.4060],
        [-1.4790, -1.0699],
        [ 0.9932,  1.0587],
        [-1.3455, -0.6910],
        [-0.3478, -0.8518],
        [-0.0420, -1.4659]])
tensor([-0.8610,  0.4798, -1.1526,  0.6640,  0.4182,  1.9204,  0.8108, -2.0770,
        -1.5355, -1.4060, -1.4790, -1.0699,  0.9932,  1.0587, -1.3455, -0.6910,
        -0.3478, -0.8518, -0.0420, -1.4659])


In [43]:
# x_view.view(x_test.size())
# x_view.size()
x_view.view(-1,1).shape

torch.Size([20, 1])