In [1]:
import random
import torch

In [2]:
# 随机生成数据
def synthetic_data(w, b, num_examples):
    X = torch.normal(0, 1, (num_examples, len(w)))  # 正态分布生成随机数
    y = torch.matmul(X, w) + b  # 矩阵乘法
    y += torch.normal(0, 0.01, y.shape)
    return X, y.reshape((-1, 1))  # reshape 把一维张量转换成二维列向量

In [3]:
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)

In [4]:
# 获取批量数据
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        batch_indices = torch.tensor(indices[i:min(i + batch_size, num_examples)])
        yield features[batch_indices], labels[batch_indices]  # 反复调用反复返回数据

In [5]:
batch_size = 10
for X, y in data_iter(batch_size, features, labels):
    print(X, '\n', y)
    break

tensor([[-0.8822,  0.0516],
        [ 1.3474,  0.6380],
        [-1.1746,  0.1782],
        [ 1.4144,  0.1014],
        [ 0.5482,  0.8050],
        [ 0.4950, -1.4595],
        [ 1.5748, -0.0922],
        [-0.5149,  1.0816],
        [ 0.3417,  1.3964],
        [-1.5793, -0.0324]]) 
 tensor([[ 2.2598],
        [ 4.7257],
        [ 1.2292],
        [ 6.6858],
        [ 2.5644],
        [10.1381],
        [ 7.6863],
        [-0.5146],
        [ 0.1171],
        [ 1.1498]])


In [6]:
# 初始化参数
w = torch.normal(0, 0.01, size=(2, 1), requires_grad=True)  # 启用梯度跟踪
b = torch.zeros(1, requires_grad=True)

In [7]:
# 定义线性回归模型
def linreg(X, w, b):
    return torch.matmul(X, w) + b

In [8]:
# 定义损失函数
def square_loss(y_hat, y):
    return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2

In [9]:
# 定义优化算法
def sgd(params, lr, batch_size):
    # 小批量随机梯度下降
    with torch.no_grad():  # 禁用梯度计算
        for param in params:
            param -= lr * param.grad / batch_size  # .grad 会储存张量在反向传播中计算的梯度
            param.grad.zero_()  # 清零梯度，pytorch不会自动清零

In [10]:
# 训练过程
lr = 0.03
num_epochs = 3
net = linreg
loss = square_loss

for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
        l = loss(net(X, w, b), y)  # 此时并不是一个标量
        l.sum().backward()
        '''
        对这个标量损失进行反向传播，计算所有 requires_grad=True 的参数（w 和 b）的梯度，并存储到它们的 .grad 属性中
        梯度计算基于链式法则，从损失函数反向传播到每个参数
        '''
        sgd([w, b], lr, batch_size)
    with torch.no_grad():
        train_l = loss(net(features, w, b), labels)
        print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')

epoch 1, loss 0.036897
epoch 2, loss 0.000129
epoch 3, loss 0.000049


In [11]:
print(f' w 的估计误差: {true_w - w.reshape(true_w.shape)}')
print(f' b 的估计误差: {true_b - b}')

 w 的估计误差: tensor([0.0015, 0.0003], grad_fn=<SubBackward0>)
 b 的估计误差: tensor([0.0009], grad_fn=<RsubBackward1>)


In [12]:
# 线性回归的简洁实现
import numpy as np
import torch
from torch.utils import data

In [13]:
true_w = torch.tensor([2, -3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)

In [14]:
# 调用框架中现有的 API 来读取数据
def load_array(data_arrays, batch_size, is_train=True):
    # 构造一个pytorch数据迭代器
    dataset = data.TensorDataset(*data_arrays)  # 将传入的数据集转换成 Tensor 形式
    return data.DataLoader(dataset, batch_size, shuffle=is_train)  # 随机抽取一些样本

In [15]:
batch_size = 10
data_iter = load_array((features, labels), batch_size)
next(iter(data_iter))

[tensor([[ 2.6302, -1.5749],
         [-1.0621,  0.3685],
         [ 1.4777,  0.8144],
         [ 0.3532, -0.9794],
         [-2.3617, -0.9761],
         [-0.9843,  0.0155],
         [ 1.8466,  0.8107],
         [-1.3918, -0.5698],
         [ 1.0957,  0.6817],
         [-0.0581,  1.2857]]),
 tensor([[14.8155],
         [ 0.8256],
         [ 4.3872],
         [ 8.2313],
         [ 2.8038],
         [ 2.1724],
         [ 5.1408],
         [ 3.3504],
         [ 4.0927],
         [-0.2775]])]

In [16]:
# 使用框架的预定义好的层
from torch import nn

net = nn.Sequential(nn.Linear(2, 1))
# 把包含一个线性层的网络放在 Sequential 容器里面，容器可以放置多个层
# 网络的输入特征维度为 2，输出维度为 1

In [17]:
# 初始化模型参数
net[0].weight.data.normal_(0, 0.01)
net[0].bias.data.fill_(0)

tensor([0.])

In [18]:
# 计算均方误差
loss = nn.MSELoss()

In [19]:
# 实例化 SGD 实例
trainer = torch.optim.SGD(net.parameters(), lr=0.3)
# 把这个神经网络里所有要学习的参数都给net.parameters()，用优化器去更新它们

In [20]:
# 训练部分
num_epochs = 3
for epoch in range(num_epochs):
    for X, y in data_iter:
        l = loss(net(X), y)
        trainer.zero_grad()  # 梯度清零
        l.backward()
        trainer.step()  # 用于根据当前参数的梯度更新模型参数
    l = loss(net(features), labels)
    print(f'epoch {epoch + 1}, loss {l:f}')

epoch 1, loss 0.000136
epoch 2, loss 0.000117
epoch 3, loss 0.000115
