In [28]:
import numpy as np
import torch
from torch.utils import data
from d2l import torch as d2l

In [29]:
gt_w: torch.Tensor = torch.tensor([2, -3.4]) # Ground-truth weight，真实权重
gt_b: float = 4.2 # Ground-truth bias，真实偏差

features, labels = d2l.synthetic_data(gt_w, gt_b, 1000) # 生成数据集
features, labels

(tensor([[-1.0531,  0.4850],
         [-0.0721,  0.3969],
         [ 0.5090, -1.2865],
         ...,
         [-0.4344,  0.9563],
         [ 1.3744, -0.7106],
         [-0.3778,  0.3278]]),
 tensor([[ 4.5525e-01],
         [ 2.7029e+00],
         [ 9.5888e+00],
         [ 4.5188e+00],
         [ 3.3563e+00],
         [ 1.2784e+01],
         [ 6.8937e+00],
         [ 7.3823e+00],
         [ 3.6864e+00],
         [-4.9631e-01],
         [-2.5694e+00],
         [ 7.1929e+00],
         [ 5.8849e+00],
         [ 2.5163e+00],
         [ 2.3736e+00],
         [ 3.1744e+00],
         [ 6.1564e+00],
         [-3.7558e+00],
         [ 1.6456e+00],
         [ 4.4583e+00],
         [ 5.9423e+00],
         [ 5.2694e+00],
         [ 1.5171e+01],
         [-1.2821e+00],
         [ 1.2873e+01],
         [-4.4014e+00],
         [-4.2778e-01],
         [ 9.7529e+00],
         [ 4.6951e+00],
         [ 5.5260e+00],
         [ 1.3854e+01],
         [ 7.4662e-01],
         [ 4.8385e+00],
         [ 9.3596e

In [30]:
def load_array(data_arrays: tuple, batch_size: int, is_train: bool = True) -> data.DataLoader:
    '''构造一个PyTorch数据迭代器

    Args:
        data_arrays (list): 传进来的训练集，包括features和labels
        batch_size (int): 所要求的batch_size
        is_train (bool, optional): 是否为训练集. 默认为True

    Returns:
        data.DataLoader: 返回一个PyTorch的DataLoader，用于迭代
    '''
    dataset: data.TensorDataset = data.TensorDataset(*data_arrays) # 构造一个TensorDataset。*data_arrays表示将data_arrays中的每个元素都作为一个参数传入，在某种程度上叫做解引用
    return data.DataLoader(dataset, batch_size, shuffle=is_train) # 构造一个DataLoader

In [31]:
batch_size: int = 10
data_iter: data.DataLoader = load_array((features, labels), batch_size) # 构造一个DataLoader

next(iter(data_iter)) # 从DataLoader中取出一个batch

[tensor([[ 1.4985, -0.1169],
         [-0.3527, -1.0539],
         [-0.5598,  1.7545],
         [ 0.6575, -0.4753],
         [ 0.1453,  0.4857],
         [-0.4754, -2.9944],
         [-0.6353, -1.1598],
         [-0.1008, -0.0435],
         [ 0.8502, -0.9469],
         [-0.5447, -0.9135]]),
 tensor([[ 7.5952],
         [ 7.0839],
         [-2.8979],
         [ 7.1235],
         [ 2.8420],
         [13.4373],
         [ 6.8751],
         [ 4.1303],
         [ 9.1211],
         [ 6.2038]])]

In [32]:
from torch import nn

net: nn.Module = nn.Sequential(nn.Linear(2, 1)) # 构造一个线性层，输入维度为2，输出维度为1。sequential是一个容器，可以将多个层组合在一起。这里只有一个线性层，所以只有一个层

In [33]:
net[0].weight.data.normal_(0, 0.01) # 初始化权重
net[0].bias.data.fill_(0) # 初始化偏差
net

Sequential(
  (0): Linear(in_features=2, out_features=1, bias=True)
)

In [34]:
loss: nn.Module = nn.MSELoss() # 构造一个均方误差损失函数
trainer: torch.optim.Optimizer = torch.optim.SGD(net.parameters(), lr=0.03) # 构造一个优化器，这里使用的是随机梯度下降，学习率为0.03

In [35]:
num_epochs: int = 3 # 迭代次数
for epoch in range(num_epochs): # 迭代num_epochs次
    for X, y in data_iter: # 从DataLoader中取出一个batch
        l: torch.Tensor = loss(net(X), y) # 计算损失
        trainer.zero_grad() # 梯度清零，PyTorch默认会累加梯度
        l.backward() # 反向传播
        trainer.step() # 更新参数
    l = loss(net(features), labels) # 计算损失
    print(f'epoch {epoch + 1}, loss {l:f}') # 打印损失

epoch 1, loss 0.000163
epoch 2, loss 0.000104
epoch 3, loss 0.000104
