In [88]:
%matplotlib inline
import random
import torch
from d2l import torch as d2l

In [89]:
def synthetic_data(w, b, num_examples):
    # generate y = wx + b + 噪声
    x = torch.normal(0, 1, (num_examples, len(w)))
    y = torch.matmul(x, w) + b
    y += torch.normal(0, 0.01, y.shape)
    return x, y.reshape((-1, 1))

true_w = torch.tensor([2, -3.4], dtype=torch.float32)
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)

In [90]:
print('features:', features[0],'\nlabel:', labels[0])

features: tensor([0.7381, 0.4171]) 
label: tensor([4.2498])


In [91]:
d2l.set_figsize()
#d2l.plt.scatter(features[:, 1].detach().numpy(), labels.detach().numpy(), 1);
#d2l.plot.show()

In [92]:
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices)
    # print(indices)
    for i in range(0, num_examples, batch_size):
        batch_indices = torch.tensor(
            indices[i:min(i + batch_size, num_examples)]
        )
        yield features[batch_indices], labels[batch_indices]


In [93]:
batch_size = 10
for x, y in data_iter(batch_size=batch_size, features=features, labels=labels):
    print(x, '\n', y)
    break

tensor([[ 0.5601,  0.7714],
        [-0.9142, -0.4957],
        [-0.4034,  1.0671],
        [-0.6418, -0.1422],
        [ 0.6082, -0.8537],
        [-1.4738,  1.9927],
        [ 1.7154, -0.5196],
        [-0.7506,  0.3712],
        [-1.4413, -1.2807],
        [-0.0293, -0.7154]]) 
 tensor([[ 2.7029],
        [ 4.0580],
        [-0.2430],
        [ 3.3995],
        [ 8.3094],
        [-5.5458],
        [ 9.4178],
        [ 1.4497],
        [ 5.6748],
        [ 6.5942]])


In [94]:
num = torch.tensor([1, 2, 3, 4, 5, 6])
index = [3, 5]
num[index]

tensor([4, 6])

In [95]:
w = torch.normal(0, 0.01, size=(2, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
# for param in [w, b]:
    # print(param)
[w, b]

[tensor([[ 0.0045],
         [-0.0063]], requires_grad=True),
 tensor([0.], requires_grad=True)]

In [96]:
def linereg(x, w, b):
    # 线性回归函数
    return torch.matmul(x, w) + b

In [97]:
# 定义损失函数
def squared_loss(y_hat, y):
    # 均方损失
    return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2

In [98]:
# 定义优化算法
def sgd(params, lr, batch_size):
    # 小批量随机梯度下降
    with torch.no_grad():   # 更新时不要参与梯度计算
        for param in params:
            param -= lr * param.grad / batch_size
            param.grad.zero_()      # 手动的将梯度设置为0
            

In [99]:
# 训练过程
lr = 0.03
num_epochs = 3
net = linereg
loss = squared_loss

for epoch in range(num_epochs):
    for x, y in data_iter(batch_size, features, labels):
        l = loss(net(x, w, b), y)
        # l.shape = [batch_size, 1],而不是一个标量，l中所有的元素被加到loss中
        l.sum().backward()
        sgd([w, b], lr, batch_size)
    
    with torch.no_grad():
        train_l = loss(net(features, w, b), labels)
        print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')

epoch 1, loss 0.034333
epoch 2, loss 0.000126
epoch 3, loss 0.000051
