In [8]:
import numpy as np
import torch
import torch.nn as nn

In [4]:
num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
features = torch.tensor(np.random.normal(0, 1, (num_examples, num_inputs)), dtype=torch.float)
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()), dtype=torch.float)

In [5]:
import torch.utils.data as Data

batch_size = 10
# 将训练数据的特征和标签组合
dataset = Data.TensorDataset(features, labels)
# 随机读取小批量
data_iter = Data.DataLoader(dataset, batch_size, shuffle=True)

In [6]:
for X, y in data_iter:
    print(X, y)
    break

tensor([[-0.2246, -0.8479],
        [ 1.7713, -0.4043],
        [ 1.0794, -1.1410],
        [ 0.4818, -0.2696],
        [-0.6499, -0.3623],
        [-1.1859, -1.4050],
        [ 0.5451, -0.6433],
        [-0.4957,  1.2244],
        [-2.4287,  1.4357],
        [-0.4275, -1.3112]]) tensor([ 6.6368,  9.1104, 10.2296,  6.0645,  4.1182,  6.6116,  7.4623, -0.9639,
        -5.5491,  7.8042])


# 定义模型

In [9]:
class LinearNet(nn.Module):
    def __init__(self,n_feature):
        super(LinearNet,self).__init__()
        self.linear=nn.Linear(n_feature,1)
    def forward(self,x):
        y=self.linear(x)
        return y

net=LinearNet(num_inputs)
print(net)

LinearNet(
  (linear): Linear(in_features=2, out_features=1, bias=True)
)


In [27]:
#写法一
net=nn.Sequential(
    nn.Linear(num_inputs,5),
    nn.Linear(5,1)
    #还可写入其他层
)
#写法二
net=nn.Sequential()
net.add_module('linear',nn.Linear(num_inputs,5))
net.add_module('linear',nn.Linear(5,1))
#写法三
from collections import OrderedDict
net=nn.Sequential(OrderedDict([
    ('linear',nn.Linear(num_inputs,5)),
    ('linear2',nn.Linear(5,1))
    #……
]))
print(net)
print(net[0])

Sequential(
  (linear): Linear(in_features=2, out_features=5, bias=True)
  (linear2): Linear(in_features=5, out_features=1, bias=True)
)
Linear(in_features=2, out_features=5, bias=True)


In [28]:
#可以通过net.parameters()来查看模型所有的可学习参数，此函数将返回一个生成器。
for param in net.parameters():
    print(param)

Parameter containing:
tensor([[-0.6520,  0.2242],
        [-0.6648, -0.3602],
        [-0.2816, -0.1136],
        [-0.3556, -0.2805],
        [ 0.6093,  0.6340]], requires_grad=True)
Parameter containing:
tensor([ 0.1178,  0.4976,  0.5527, -0.3844,  0.3169], requires_grad=True)
Parameter containing:
tensor([[ 0.0460, -0.2100,  0.3646, -0.3202, -0.1397]], requires_grad=True)
Parameter containing:
tensor([0.2287], requires_grad=True)


# 初始化模型参数

In [29]:
from torch.nn import init
init.normal_(net[0].weight,mean=0,std=0.01)
init.constant_(net[0].bias,val=0) #也可以直接修改bias的data: net[0].bias.data.fill_(0)

Parameter containing:
tensor([0., 0., 0., 0., 0.], requires_grad=True)

# 自定义损失函数

In [30]:
loss = nn.MSELoss()

# 定义优化算法

In [31]:
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr=0.03)
print(optimizer)

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.03
    momentum: 0
    nesterov: False
    weight_decay: 0
)


In [32]:
#我们还可以为不同子网络设置不同的学习率，这在finetune时经常用到。例：
optimizer =optim.SGD([
                # 如果对某个参数不指定学习率，就使用最外层的默认学习率
                {'params': net[0].parameters()}, # lr=0.03
                {'params': net[1].parameters(), 'lr': 0.01}
            ], lr=0.03)

In [34]:
# 调整学习率
for param_group in optimizer.param_groups:
    param_group['lr'] *= 0.1 # 学习率为之前的0.1倍

# 训练模型

In [35]:
class LinearNet(nn.Module):
    def __init__(self,n_feature):
        super(LinearNet,self).__init__()
        self.linear=nn.Linear(n_feature,1)
    def forward(self,x):
        y=self.linear(x)
        return y

net=LinearNet(num_inputs)
print(net)

optimizer = optim.SGD(net.parameters(), lr=0.03)

num_epochs = 3
for epoch in range(1, num_epochs + 1):
    for X, y in data_iter:
        output = net(X)
        l = loss(output, y.view(-1, 1))
        optimizer.zero_grad() # 梯度清零，等价于net.zero_grad()
        l.backward()
        optimizer.step()
    print('epoch %d, loss: %f' % (epoch, l.item()))

LinearNet(
  (linear): Linear(in_features=2, out_features=1, bias=True)
)
epoch 1, loss: 0.000203
epoch 2, loss: 0.000060
epoch 3, loss: 0.000124


In [36]:
#下面我们分别比较学到的模型参数和真实的模型参数。我们从net获得需要的层，并访问其权重（weight）和偏差（bias）。学到的参数和真实的参数很接近。
dense = net.linear
print(true_w, dense.weight)
print(true_b, dense.bias)

[2, -3.4] Parameter containing:
tensor([[ 2.0002, -3.3999]], requires_grad=True)
4.2 Parameter containing:
tensor([4.1999], requires_grad=True)
