In [1]:
%matplotlib inline
import torch
from IPython import display
from matplotlib import pyplot as plt
import numpy as np
import random

In [2]:
## 生成数据集
true_weight = [2, -3]
true_bias = 1.8

features = torch.randn(1000, 2, dtype=torch.float32)
labels = torch.mm(features, torch.tensor(true_weight, dtype=torch.float32).view((2,1))) + true_bias 
labels += torch.tensor(np.random.normal(0, 0.01, size=labels.shape))
features.shape, labels.shape

(torch.Size([1000, 2]), torch.Size([1000, 1]))

In [3]:
## 读取数据, 建立 batch_iter

batch_size = 10
data_set = torch.utils.data.TensorDataset(features, labels)
data_iter = torch.utils.data.DataLoader(data_set, batch_size, shuffle=True)

In [4]:
for i in data_iter:
    print(i)
    break

[tensor([[ 0.1800,  0.3207],
        [-1.4036,  1.2887],
        [-0.2297, -1.0200],
        [-0.5457,  0.3420],
        [ 1.2325,  0.1081],
        [ 0.5801,  0.0608],
        [-1.4980,  1.3699],
        [-0.2110,  1.1738],
        [ 0.1474,  1.1944],
        [-0.9814,  0.6353]]), tensor([[ 1.1970],
        [-4.8788],
        [ 4.4226],
        [-0.3077],
        [ 3.9482],
        [ 2.7760],
        [-5.3029],
        [-2.1424],
        [-1.4962],
        [-2.0764]])]


In [5]:
w = torch.tensor(np.random.normal(0, 0.1, size=(2,1)), dtype=torch.float32, requires_grad=True)
b = torch.zeros(1, dtype=torch.float32, requires_grad=True)

class LinearNet(torch.nn.Module):
    def __init__(self):
        super(LinearNet, self).__init__()
        self.linear = torch.nn.Linear(in_features=2, out_features=1, bias=True)
        torch.nn.init.normal_(self.linear.weight, 0., 0.1)
        torch.nn.init.constant_(self.linear.bias, 0.)
    
    def forward(self, x):
        return self.linear(x)

net = LinearNet()
print(net)

LinearNet(
  (linear): Linear(in_features=2, out_features=1, bias=True)
)


In [6]:
#torch.nn.Module??

In [7]:
## 查看参数
for param in net.parameters():
    print(param)
    
## 初始化参数
# torch.nn.init.normal_(net.linear.weight, 0., 0.1)
# torch.nn.init.constant_(net.linear.bias, 0.)

Parameter containing:
tensor([[-0.0609, -0.0697]], requires_grad=True)
Parameter containing:
tensor([0.], requires_grad=True)


In [8]:
loss = torch.nn.MSELoss()
print(loss)

optimizer = torch.optim.SGD(net.parameters(), lr=0.03, )
print(optimizer)

# optimizer =optim.SGD([
#     # 如果对某个参数不指定学习率，就使用最外层的默认学习率
#     {'params': net.subnet1.parameters()}, # lr=0.03
#     {'params': net.subnet2.parameters(), 'lr': 0.01}
# ], lr=0.03)

MSELoss()
SGD (
Parameter Group 0
    dampening: 0
    lr: 0.03
    momentum: 0
    nesterov: False
    weight_decay: 0
)


In [9]:
num_epochs = 3
for epoch in range(1, num_epochs + 1):
    for X, y in data_iter:
        optimizer.zero_grad() # 梯度清零，等价于net.zero_grad()
        output = net(X)
        l = loss(output, y.view(output.shape))
        l.backward()
        optimizer.step()
    print('epoch %d, loss: %f' % (epoch, l.item()))

epoch 1, loss: 0.000158
epoch 2, loss: 0.000138
epoch 3, loss: 0.000061


In [10]:
print(true_weight, net.linear.weight)
print(true_bias, net.linear.bias)

[2, -3] Parameter containing:
tensor([[ 1.9997, -3.0000]], requires_grad=True)
1.8 Parameter containing:
tensor([1.7993], requires_grad=True)


**with regularization**

In [11]:
net = LinearNet() # initiallized
print(net)

optimizer_w = torch.optim.SGD([net.linear.weight], lr=0.03, weight_decay=0.1)
optimizer_b = torch.optim.SGD([net.linear.bias], lr=0.03)

LinearNet(
  (linear): Linear(in_features=2, out_features=1, bias=True)
)


In [12]:
num_epochs = 10
optimizers = [optimizer_w, optimizer_b]
for epoch in range(1, num_epochs+1):
    for X, y in data_iter:
        for opti in optimizers:
            opti.zero_grad()
        y_hat = net(X)
        l = loss(y_hat, y.view(y_hat.shape))
        l.backward()
        
        for opti in optimizers:
            opti.step()
    print('epoch %d, loss: %f' % (epoch, l.item()))    

epoch 1, loss: 0.017943
epoch 2, loss: 0.023046
epoch 3, loss: 0.021141
epoch 4, loss: 0.033864
epoch 5, loss: 0.033869
epoch 6, loss: 0.022412
epoch 7, loss: 0.055030
epoch 8, loss: 0.021079
epoch 9, loss: 0.020980
epoch 10, loss: 0.034851


In [13]:
print(true_weight, net.linear.weight)
print(true_bias, net.linear.bias)

[2, -3] Parameter containing:
tensor([[ 1.9326, -2.8590]], requires_grad=True)
1.8 Parameter containing:
tensor([1.8000], requires_grad=True)
