# 简化版线性回归模型

In [58]:
import numpy as np
import torch
from torch.utils import data
from d2l import torch as d2l

## 0、生成数据集

In [59]:
w_dim = 2  # 输入数据维数
sample_nums = 1000  # 训练集样本数
lr = 0.05  # 学习率
batch_size = 10 # 小批量数据集样本数
num_epochs = 10  # 训练轮数

true_w = torch.rand(w_dim)
true_b = 10 * torch.randn(1).item()  # 0-10 的正态分布随机数
features, labels = d2l.synthetic_data(true_w, true_b, sample_nums)

## 1、读取数据集

In [60]:
def load_array(data_arrays, batch_size, is_train=True):  #@save
    """构造一个PyTorch数据迭代器"""
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)

## 2、定义模型

In [61]:
# nn是神经网络的缩写
from torch import nn

net = nn.Sequential(nn.Linear(w_dim, 1))
net

Sequential(
  (0): Linear(in_features=2, out_features=1, bias=True)
)

## 3、定义损失函数

In [62]:
# loss = nn.MSELoss()
loss = nn.HuberLoss()

## 4、定义优化算法

In [63]:
trainer = torch.optim.SGD(net.parameters(), lr=lr)

## 5、初始化参数

In [64]:
net[0].weight.data.normal_(0, 0.01)
net[0].bias.data.fill_(0)

tensor([0.])

## 6、训练模型

In [65]:
data_iter = load_array((features, labels), batch_size)

for epoch in range(num_epochs):
    for X, y in data_iter:
        l = loss(net(X) ,y)
        trainer.zero_grad()
        l.backward()
        trainer.step()
    l = loss(net(features), labels)
    print(f'epoch {epoch + 1}, loss {l:f}')

b = net[0].bias.data
w = net[0].weight.data

print(f"数据集中的：true_w: {true_w}, true_b: {true_b}")
print(f"模型的训练值：w: {w}, b: {b}")
print('w的估计误差：', true_w - w.reshape(true_w.shape))
print('b的估计误差：', true_b - b)

epoch 1, loss 2.048909
epoch 2, loss 0.000992
epoch 3, loss 0.000046
epoch 4, loss 0.000046
epoch 5, loss 0.000046
epoch 6, loss 0.000046
epoch 7, loss 0.000046
epoch 8, loss 0.000046
epoch 9, loss 0.000046
epoch 10, loss 0.000046
数据集中的：true_w: tensor([0.6566, 0.1733]), true_b: 7.558106780052185
模型的训练值：w: tensor([[0.6566, 0.1731]]), b: tensor([7.5584])
w的估计误差： tensor([6.1929e-05, 2.1994e-04])
b的估计误差： tensor([-0.0003])
