## 利用PyTorch从零实现线性回归

In [13]:
%matplotlib inline
import random
import torch

1. 人造数据<br>
构造一个人造数据集，w=[2,-3.4], b=4.2, 带有噪声$\epsilon$ 即 :
$$
y = Xw+b+\epsilon
$$

In [14]:
def synthetic_data(w, b, num_examples): # 人造数据集
    X = torch.normal(0, 1, (num_examples, len(w))) # 均值为0方差为1的服从正态分布的随机数;
    # 尺寸为 num_examples行 len(w)列; (1000,2)
    y = torch.matmul(X,w) + b # (1000,2) * (2,1) + (1)
    y += torch.normal(0, 0.01, y.shape) # 添加噪声

    return X, y.reshape((-1,1)) # 将y做成一个列向量(1000,1)返回

初始化

In [15]:
true_w = torch.tensor([2,-3.4])
true_b = 4.2
features, labels = synthetic_data(true_w, true_b, 1000)
# features.shape (1000,2) labels.shape(1000,1)

In [16]:
# # 可视化一下
# import matplotlib.pyplot as plt
# plt.scatter(features[:,1].numpy(), labels.numpy(), s=10)

2. 定义一个生成小批量的函数<br>
data_iter输入为批量大小、特征矩阵和标签向量，迭代生成大小为batch_size的小批量

In [17]:
def data_iter(batch_size, features, labels): # generator function
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices) # 随机打乱下标
    # batch_features = features[indices] # 这样不行，因为目的是每次生成一个batch_size 而非num_examples个
    for i in range(0, num_examples, batch_size): # 步长为batch_size 妙
        batch_indices = torch.tensor(indices[i:min(i+batch_size, num_examples)]) # 细节：min是为了防止下标超出上界
        # 生成[0:10],[10,20],...,[990,1000]  但是此处是从经过shuffle的indices中索引，故得到一个batchsize的随机样本
        yield features[batch_indices], labels[batch_indices]
        # yield关键字将函数变成generator function生成函数
        

batch_size = 10

# # 通过for循环得到生成器内容x
# for X, y in data_iter(batch_size, features, labels):
#     print(X,'\n',y) 
#     break # 这儿就输出一个batch的样本
g = data_iter(batch_size, features, labels)
next(g)

(tensor([[-2.6238, -1.1909],
         [-1.3182, -0.1359],
         [-0.1220,  0.5916],
         [-0.0552, -0.2519],
         [-0.7003, -0.3164],
         [ 0.5688, -0.0320],
         [ 1.3282,  0.0398],
         [-0.6529, -0.5951],
         [-0.2985, -0.4908],
         [-0.4694, -0.5937]]),
 tensor([[3.0169],
         [2.0194],
         [1.9531],
         [4.9360],
         [3.8844],
         [5.4443],
         [6.7189],
         [4.9108],
         [5.2679],
         [5.2893]]))

3. 定义初始化模型参数

In [18]:
w = torch.normal(0,0.01,size=(2,1), requires_grad=True) # 初始化为正态分布 需要梯度
b = torch.zeros(1, requires_grad=True) # 初始化为0

4. 定义模型

In [19]:
def linreg(X, w, b):
    return torch.matmul(X,w) + b # 线性回归模型

5. 定义损失函数

In [20]:
def squared_loss(y_hat, y): # 此函数未求平均 mean
    return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2 # 保险起见，因为二者可能一个为行向量一个列向量

6. 定义优化算法

In [21]:
def sgd(params, lr, batch_size): # 小批量随机梯度下降
    """_summary_
    Args:
        params (list): 包含所有参数w,b的一个list
        lr (_type_): learning rate
    """
    with torch.no_grad(): # 此模块内的变量不进行自动求导 requires_grad = False (更新时不需要计算梯度)
        for param in params:
            param -= lr * param.grad / batch_size # 因为此处是对cost求偏导；square_loss中没除N
            param.grad.zero_() 

7. 训练过程

In [22]:
lr = 0.03
num_epochs = 3
net = linreg
loss = squared_loss

for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels): # 1000个样本被随机分为了100个batch
        # Forward
        y_pred = net(X,w,b)
        l = loss(y_pred, y) # X和y的小批量损失; shape=(batch_size,1)
        
        # Backward & Updata
        # 由于l不是标量scaler 求导需.sum()
        l.sum().backward()
        sgd([w,b], lr, batch_size)
    # 进行了一个epoch之后 观察评价一下进度
    with torch.no_grad(): # 无需计算梯度
        train_l = loss(net(features,w,b), labels) # 训练集损失；此处是1000个样本
        print(f'epoch {epoch+1}, train_loss {train_l.mean():f}')

epoch 1, train_loss 0.033335
epoch 2, train_loss 0.000123
epoch 3, train_loss 0.000055


In [24]:
# 比较真是参数和训练学到的参数来评估训练的成功程度
print(f'w的估计误差：{true_w - w.reshape(true_w.shape)}')
print(f'b的估计误差：{true_b - b}')

w的估计误差：tensor([ 0.0001, -0.0002], grad_fn=<SubBackward0>)
b的估计误差：tensor([5.6267e-05], grad_fn=<RsubBackward1>)
