In [1]:
import torch
import matplotlib.pyplot as plt
from matplotlib_inline import backend_inline
import random

## 用到的d2l code

In [2]:
def use_svg_display():
    backend_inline.set_matplotlib_formats('svg')


def set_figsize(figsize=(3.5, 2.5)):
    use_svg_display()
    plt.rcParams['figure.figsize'] = figsize


def synthetic_data(w, b, num_examples):  #@save
    X = torch.normal(0, 1, (num_examples, len(w)))#生成均值为0，方差为1，数据纬度是（num_examples, len(w)）的随机数据作为训练样本
    y = torch.matmul(X, w) + b #生成X对应的预测值y
    y += torch.normal(0, 0.01, y.shape)# 加入噪音，加入的是均值为0，方差为0.01，纬度和y.shape一致的噪音进行干扰
    return X, y.reshape((-1, 1))#返回X，y，y为列向量

def data_iter(batch_size, features, labels):
    num_examples = len(features) #获取样本大小
    indices = list(range(num_examples)) #获取样本脚标的list
    # 这些样本是随机读取的，没有特定的顺序
    random.shuffle(indices) #随机变换indices
    for i in range(0, num_examples, batch_size): #开始循环
        batch_indices = torch.tensor(indices[i: min(i + batch_size, num_examples)]) #有可能不能整除，取i + batch_size和num_examples的较小值
        yield features[batch_indices], labels[batch_indices] #相当于是一个迭代器，每次返回batch_size个样本

def linreg(X, w, b):  #@save
    return torch.matmul(X, w) + b

def squared_loss(y_hat, y):  #@save
    return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2

def sgd(params, lr, batch_size):  #@save
    with torch.no_grad(): #不需要计算梯度
        for param in params:
            param -= lr * param.grad / batch_size #梯度下降法更新参数
            param.grad.zero_() #手动梯度归零

## 声明若干训练参数

In [3]:
batch_size=32
true_w = torch.tensor([2, -3.4]) #真实权重
true_b = 4.2 #真实偏差
features, labels = synthetic_data(true_w, true_b, 1000) #随机生成1000组训练数据及标签

In [4]:
lrs = [0.003, 0.03, 0.3, 0.0003] #学习率
num_epochs = 3 #训练次数
net = linreg #网络，之前定义的线性网络
loss = squared_loss #损失函数，之前定义的平方损失函数

## 部分课后作业代码

In [5]:
# homework 3.2.9
#开始训练
for lr in lrs:
    print(f"training with learning rate: {lr}")
    w = torch.normal(0, 0.01, size=(2,1), requires_grad=True) #w初始化为均值为0，方差为0.001的符合正态分布的数组，纬度为2 x1
    b = torch.zeros(1, requires_grad=True) #b初始化为0，纬度为1，就是一个实数
    for epoch in range(num_epochs):
        for X, y in data_iter(batch_size, features, labels):
            l = loss(net(X, w, b), y)  # X和y的小批量损失
            # 因为l形状是(batch_size,1)，而不是一个标量。l中的所有元素被加到一起，
            # 并以此计算关于[w,b]的梯度
            l.sum().backward()
            sgd([w, b], lr, batch_size)  # 使用参数的梯度更新参数
        with torch.no_grad():
            train_l = loss(net(features, w, b), labels) #计算所有样本的损失函数
            print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')

training with learning rate: 0.003
epoch 1, loss 13.915562
epoch 2, loss 11.504158
epoch 3, loss 9.510802
training with learning rate: 0.03
epoch 1, loss 2.439851
epoch 2, loss 0.354408
epoch 3, loss 0.051464
training with learning rate: 0.3
epoch 1, loss 0.000051
epoch 2, loss 0.000050
epoch 3, loss 0.000050
training with learning rate: 0.0003
epoch 1, loss 16.544933
epoch 2, loss 16.233500
epoch 3, loss 15.927928
