In [5]:
%matplotlib inline
import random
import torch


从上帝视角构造一个线性模型参数：$ \boldsymbol{w} = [2, -3.4]^T、b = 4.2 $ 和噪声项$ \epsilon $生成数据集及其标签：
$$
\boldsymbol{y} = \boldsymbol{X}\boldsymbol{w} + b + \epsilon
$$

In [6]:
def synthetic_data(w, b, num_of_examples):
    """
    生成 y = Xx + b + 噪声
    :param num_of_examples:
    :param w: 真实的权重
    :param b: 真实的偏差
    :return: 数据集features，标签label
    """
    X = torch.normal(0, 1, (num_of_examples, len(w)))
    y = torch.matmul(X, w) + b
    y += torch.normal(0, 0.01, y.shape)
    return X, y.reshape((-1, 1))

true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)


In [7]:
def data_iter(batch_size, features, labels):
    """
    该函数接收批量大小、特征矩阵、标签向量作为输入，生成大小为batch_size的小批量
    :param batch_size: 批量的大小b
    :param features: 特征矩阵
    :param labels: 标签向量
    :return: 数据集features，标签label
    """
    num_of_examples = len(features)  # 样本的总数量
    indices = list(range(num_of_examples))  # 对每一个样本建立一个索引，放在一个列表indices里
    random.shuffle(indices)  #  打乱样本
    for i in range(0, num_of_examples, batch_size):
        batch_indices = torch.tensor(
            indices[i:min(i + batch_size, num_of_examples)]
        )
        yield features[batch_indices], labels[batch_indices]


batch_size = 10

for X, y in data_iter(batch_size, features, labels):
    print(X, '\n', y)
    break

tensor([[ 0.7455,  0.4269],
        [-0.8024,  0.3146],
        [-0.4798,  0.2780],
        [-1.4676,  0.0534],
        [-0.3230, -0.9755],
        [ 0.4927,  0.9904],
        [-0.1850, -0.7086],
        [-1.2217, -0.1348],
        [ 0.6064,  1.0651],
        [ 1.1096, -0.7164]]) 
 tensor([[4.2293],
        [1.5239],
        [2.2908],
        [1.0918],
        [6.8707],
        [1.8154],
        [6.2306],
        [2.1976],
        [1.8003],
        [8.8627]])
