In [17]:
%matplotlib inline
import random
import torch


从上帝视角构造一个线性模型参数：$ \boldsymbol{w} = [2, -3.4]^T、b = 4.2 $ 和噪声项$ \epsilon $生成数据集及其标签：
$$
\boldsymbol{y} = \boldsymbol{X}\boldsymbol{w} + b + \epsilon
$$

In [18]:
def synthetic_data(w, b, num_of_examples):
    """
    生成 y = Xx + b + 噪声
    :param num_of_examples:
    :param w: 真实的权重
    :param b: 真实的偏差
    :return: 数据集features，标签label
    """
    X = torch.normal(0, 1, (num_of_examples, len(w)))
    y = torch.matmul(X, w) + b
    y += torch.normal(0, 0.01, y.shape)
    return X, y.reshape((-1, 1))

true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)


In [19]:
def data_iter(batch_size, features, labels):
    """
    该函数接收批量大小、特征矩阵、标签向量作为输入，生成大小为batch_size的小批量
    :param batch_size: 批量的大小b
    :param features: 特征矩阵
    :param labels: 标签向量
    :return: 数据集features，标签label
    """
    num_of_examples = len(features)  # 样本的总数量
    indices = list(range(num_of_examples))  # 对每一个样本建立一个索引，放在一个列表indices里
    random.shuffle(indices)  #  打乱样本
    for i in range(0, num_of_examples, batch_size):
        batch_indices = torch.tensor(
            indices[i:min(i + batch_size, num_of_examples)]
        )
        yield features[batch_indices], labels[batch_indices]

batch_size = 10

for X, y in data_iter(batch_size, features, labels):
    print(X, '\n', y)
    break

tensor([[ 0.1424,  1.1193],
        [ 0.5143, -1.8629],
        [-0.8848,  1.1094],
        [-0.7371,  0.6283],
        [-0.2826,  0.0687],
        [-1.6677,  0.7053],
        [-0.0662,  0.6246],
        [ 2.0205,  0.3608],
        [-1.1036,  1.4053],
        [ 0.6683, -0.0991]]) 
 tensor([[ 0.6868],
        [11.5447],
        [-1.3409],
        [ 0.5956],
        [ 3.3938],
        [-1.5304],
        [ 1.9544],
        [ 7.0125],
        [-2.7790],
        [ 5.8622]])


定义初始化模型参数

In [20]:
w = torch.normal(0, 0.01, size=(2, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

定义模型

In [21]:
def linreg(X, w, b):
    """
    线性回归模型
    :param X: 特征矩阵
    :param w: 权重
    :param b: 偏差
    :return: y_hat
    """
    return torch.matmul(X, w) + b

In [22]:
def squared_loss(y_hat, y):
    """
    均方损失
    :param y_hat: 预估的值
    :param y: 实际值
    :return: 均方损失
    """
    return (y_hat - y.reshape(y_hat.shape))**2 / 2

In [23]:
def sgd(params, lr, batch_size):
    """
    小批量梯度下降
    :param params:list，里面包含了w和b
    :param lr:学习率
    :param batch_size:批量大小
    :return:
    """
    with torch.no_grad():  # 更新的时候不要计算梯度
        for param in params:
            param -= lr * param.grad / batch_size
            param.grad.zero_()  # 把梯度设为0，这样下次计算梯度时，不会和上次相关

训练过程

In [24]:
lr = 0.03  # 学习率
num_epochs = 3  # 数据集扫三遍
net = linreg  # 模型为linreg
loss = squared_loss  # 损失

for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
        l = loss(net(X, w, b), y)  # 因为l的形状是（batch_size, 1），而不是一个标量
        l.sum().backward()  # l中的所有元素被加到一起,并以此计算关于[w, b]的梯度
        sgd([w, b], lr, batch_size)  # 使用参数的梯度更新参数
    with torch.no_grad():
        train_1 = loss(net(features, w, b), labels)
        print(f'epoch {epoch + 1}, loss {float(train_1.mean()):f}')

epoch 1, loss 0.027689
epoch 2, loss 0.000093
epoch 3, loss 0.000052
