In [1]:
%matplotlib inline
import torch
from IPython import display
from matplotlib import pyplot as plt
import numpy as np
import random

In [2]:
## 生成数据集
true_weight = [2, -3]
true_bias = 1.8

features = torch.randn(1000, 2, dtype=torch.float32)
labels = torch.mm(features, torch.tensor(true_weight, dtype=torch.float32).view((2,1))) + true_bias 
labels += torch.tensor(np.random.normal(0, 0.01, size=labels.shape))
features.shape, labels.shape

(torch.Size([1000, 2]), torch.Size([1000, 1]))

In [3]:
## 读取数据, 建立 batch_iter

batch_size = 10
data_set = torch.utils.data.TensorDataset(features, labels)
data_iter = torch.utils.data.DataLoader(data_set, batch_size, shuffle=True)

In [4]:
for i in data_iter:
    print(i)
    break

[tensor([[-0.9908, -0.6345],
        [ 0.2578, -2.0135],
        [ 0.7863, -0.6099],
        [ 0.2873,  2.3473],
        [ 0.1547,  0.6940],
        [-2.0706,  1.3718],
        [-0.9905, -0.9074],
        [-0.6247,  0.2997],
        [ 0.8911, -0.2604],
        [ 0.2079,  1.6563]]), tensor([[ 1.7171],
        [ 8.3489],
        [ 5.2116],
        [-4.6605],
        [ 0.0175],
        [-6.4694],
        [ 2.5425],
        [-0.3490],
        [ 4.3507],
        [-2.7497]])]


In [5]:
w = torch.tensor(np.random.normal(0, 0.1, size=(2,1)), dtype=torch.float32, requires_grad=True)
b = torch.zeros(1, dtype=torch.float32, requires_grad=True)

class LinearNet(torch.nn.Module):
    def __init__(self):
        super(LinearNet, self).__init__()
        self.linear = torch.nn.Linear(in_features=2, out_features=1, bias=True)
        torch.nn.init.normal_(self.linear.weight, 0., 0.1)
        torch.nn.init.constant_(self.linear.bias, 0.)
    
    def forward(self, x):
        return self.linear(x)

net = LinearNet()
print(net)

LinearNet(
  (linear): Linear(in_features=2, out_features=1, bias=True)
)


In [6]:
#torch.nn.Module??

In [7]:
## 查看参数
for param in net.parameters():
    print(param)
    
## 初始化参数
# torch.nn.init.normal_(net.linear.weight, 0., 0.1)
# torch.nn.init.constant_(net.linear.bias, 0.)

Parameter containing:
tensor([[ 0.1654, -0.1335]], requires_grad=True)
Parameter containing:
tensor([0.], requires_grad=True)


In [8]:
loss = torch.nn.MSELoss()
print(loss)

optimizer = torch.optim.SGD(net.parameters(), lr=0.03, )
print(optimizer)

# optimizer =optim.SGD([
#     # 如果对某个参数不指定学习率，就使用最外层的默认学习率
#     {'params': net.subnet1.parameters()}, # lr=0.03
#     {'params': net.subnet2.parameters(), 'lr': 0.01}
# ], lr=0.03)

MSELoss()
SGD (
Parameter Group 0
    dampening: 0
    lr: 0.03
    momentum: 0
    nesterov: False
    weight_decay: 0
)


In [13]:
num_epochs = 3
for epoch in range(1, num_epochs + 1):
    for X, y in data_iter:
        optimizer.zero_grad() # 梯度清零，等价于net.zero_grad()
        output = net(X)
        l = loss(output, y.view(output.shape))
        l.backward()
        optimizer.step()
    print('epoch %d, loss: %f' % (epoch, l.item()))

epoch 1, loss: 0.000040
epoch 2, loss: 0.000095
epoch 3, loss: 0.000065


In [14]:
print(true_weight, net.linear.weight)
print(true_bias, net.linear.bias)

[2, -3] Parameter containing:
tensor([[ 1.9998, -3.0002]], requires_grad=True)
1.8 Parameter containing:
tensor([1.7998], requires_grad=True)


**with regularization**

In [40]:
net = LinearNet() # initiallized
print(net)

optimizer_w = torch.optim.SGD([net.linear.weight], lr=0.03, weight_decay=0.1)
optimizer_b = torch.optim.SGD([net.linear.bias], lr=0.03)

LinearNet(
  (linear): Linear(in_features=2, out_features=1, bias=True)
)


In [41]:
num_epochs = 10
optimizers = [optimizer_w, optimizer_b]
for epoch in range(1, num_epochs+1):
    for X, y in data_iter:
        for opti in optimizers:
            opti.zero_grad()
        y_hat = net(X)
        l = loss(y_hat, y.view(y_hat.shape))
        l.backward()
        
        for opti in optimizers:
            opti.step()
    print('epoch %d, loss: %f' % (epoch, l.item()))    

epoch 1, loss: 0.046976
epoch 2, loss: 0.016501
epoch 3, loss: 0.027449
epoch 4, loss: 0.040533
epoch 5, loss: 0.028795
epoch 6, loss: 0.029837
epoch 7, loss: 0.041452
epoch 8, loss: 0.012664
epoch 9, loss: 0.058979
epoch 10, loss: 0.014121


In [42]:
print(true_weight, net.linear.weight)
print(true_bias, net.linear.bias)

[2, -3] Parameter containing:
tensor([[ 1.8984, -2.8681]], requires_grad=True)
1.8 Parameter containing:
tensor([1.8066], requires_grad=True)
