In [1]:
import torch
import matplotlib.pyplot as plt
from matplotlib_inline import backend_inline
import random

## 使用到的部分d2l code

In [2]:
def use_svg_display():
    backend_inline.set_matplotlib_formats('svg')


def set_figsize(figsize=(3.5, 2.5)):
    use_svg_display()
    plt.rcParams['figure.figsize'] = figsize


def synthetic_data(w, b, num_examples):  #@save
    X = torch.normal(0, 1, (num_examples, len(w)))#生成均值为0，方差为1，数据纬度是（num_examples, len(w)）的随机数据作为训练样本
    y = torch.matmul(X, w) + b #生成X对应的预测值y
    y += torch.normal(0, 0.01, y.shape)# 加入噪音，加入的是均值为0，方差为0.01，纬度和y.shape一致的噪音进行干扰
    return X, y.reshape((-1, 1))#返回X，y，y为列向量

def data_iter(batch_size, features, labels):
    num_examples = len(features) #获取样本大小
    indices = list(range(num_examples)) #获取样本脚标的list
    # 这些样本是随机读取的，没有特定的顺序
    random.shuffle(indices) #随机变换indices
    for i in range(0, num_examples, batch_size): #开始循环
        batch_indices = torch.tensor(indices[i: min(i + batch_size, num_examples)]) #有可能不能整除，取i + batch_size和num_examples的较小值
        yield features[batch_indices], labels[batch_indices] #相当于是一个迭代器，每次返回batch_size个样本

def linreg(X, w, b):  #@save
    return torch.matmul(X, w) + b

def squared_loss(y_hat, y):  #@save
    return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2

def sgd(params, lr, batch_size):  #@save
    with torch.no_grad(): #不需要计算梯度
        for param in params:
            param -= lr * param.grad / batch_size #梯度下降法更新参数
            param.grad.zero_() #手动梯度归零

## 作业

In [3]:
# some hyper-parameters
BATCH_SIZE = 32
LR = 1e-2
EPOCH = 15

In [4]:
# DataLoader
class RegDataSets(torch.utils.data.Dataset):
    def __init__(self, true_w, true_b, sample_size):
        self.features, self.labels = synthetic_data(true_w, true_b, sample_size)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

    def __len__(self):
        return len(self.labels)


#data-loader
reg_data_loader = torch.utils.data.DataLoader(RegDataSets(torch.tensor([2, -3.4]), 4.2, 1000), 
                                              batch_size=BATCH_SIZE, shuffle=True)
# model 
net = torch.nn.Sequential(torch.nn.Linear(2, 1))
# optimizer
optim = torch.optim.Adam(net.parameters(), lr=LR)
# loss
loss = torch.nn.MSELoss()


In [5]:
# train_loop with mean loss
for epoch in range(EPOCH):
    total_loss = 0
    for X, y in reg_data_loader:
        net.zero_grad()
        l = loss(net(X), y)
        total_loss += l.item()
        l.backward()
        optim.step()
    print(f'epoch {epoch + 1}, loss {total_loss:f}')

epoch 1, loss 826.020897
epoch 2, loss 681.324340
epoch 3, loss 534.184391
epoch 4, loss 423.413263
epoch 5, loss 338.355702
epoch 6, loss 267.880443
epoch 7, loss 211.172677
epoch 8, loss 161.417068
epoch 9, loss 124.391500
epoch 10, loss 93.092874
epoch 11, loss 70.407469
epoch 12, loss 52.231450
epoch 13, loss 37.931536
epoch 14, loss 27.824126
epoch 15, loss 20.043169


In [6]:
for param in net[0].parameters():
    print(param)

Parameter containing:
tensor([[ 1.9855, -3.0008]], requires_grad=True)
Parameter containing:
tensor([3.6033], requires_grad=True)


In [7]:
# homework 
net = torch.nn.Sequential(torch.nn.Linear(2, 1))
loss = torch.nn.MSELoss(reduction='sum')
optim = torch.optim.Adam(net.parameters(), lr=LR)
# train_loop with sum loss
for epoch in range(EPOCH):
    total_loss = 0
    for X, y in reg_data_loader:
        net.zero_grad()
        l = loss(net(X), y)
        total_loss += l.item()
        l.backward()
        optim.step()
    print(f'epoch {epoch + 1}, loss {total_loss:f}')

epoch 1, loss 32405.973846
epoch 2, loss 27342.004089
epoch 3, loss 23015.189224
epoch 4, loss 19267.572769
epoch 5, loss 16074.872162
epoch 6, loss 13361.552704
epoch 7, loss 11067.781307
epoch 8, loss 9097.136734
epoch 9, loss 7449.881424
epoch 10, loss 6063.423054
epoch 11, loss 4909.878922
epoch 12, loss 3951.900068
epoch 13, loss 3159.437819
epoch 14, loss 2511.264574
epoch 15, loss 1981.647661


In [8]:
# homework 
# 之后可以使用这一点进行高效的梯度裁剪
net[0].weight.grad

tensor([[-8.3320, 15.8991]])

In [9]:
for param in net[0].parameters():
    print(param)

Parameter containing:
tensor([[ 1.9769, -3.0305]], requires_grad=True)
Parameter containing:
tensor([2.9373], requires_grad=True)


In [10]:
# 通过attribute访问一个网络模型的层和参数
for layer in net.children():
    if hasattr(layer, 'reset_parameters'):
        layer.reset_parameters()
for param in net[0].parameters():
    print(param)

Parameter containing:
tensor([[0.3032, 0.5353]], requires_grad=True)
Parameter containing:
tensor([-0.2904], requires_grad=True)
