### :
    线性回归输出是一个连续值，因此适用于回归问题
    如预测房屋价格、气温、销售额等连续值的问题
    
    与回归问题不同，分类问题中模型的最终输出是一个离散值

### 线性回归基本要素:
    模型定义
    模型训练：训练数据、损失函数、优化算法
    模型预测

### 线性回归-从零开始实现：
    只利用Tensor和autograd来实现一个线性回归的训练

In [31]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import torch
import random
from IPython import display

In [32]:
%run d2lzh_pytorch.ipynb

#### 生成数据集

In [33]:
num_inputs = 2
num_examples = 1000
x = torch.randn(num_examples,num_inputs,dtype=torch.float32)
true_w = [2,-3.4]
true_b = 4.2
y = true_w[0] * x[:,0] + true_w[1] * x[:,1] + true_b
y += torch.tensor(np.random.normal(0,0.01,size=y.size()),dtype=torch.float32) # 噪声
# num_inputs = 2
# num_examples = 1000
# true_w = [2, -3.4]
# true_b = 4.2
# features = torch.randn(num_examples, num_inputs,
#                        dtype=torch.float32)
# labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
# labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()),
#                        dtype=torch.float32)

In [34]:
x[0],y[0]

(tensor([-0.0557, -2.0816]), tensor(11.1734))

#### 读取数据

In [35]:
def data_iter(batch_size,features,labels): # 相当于 DataLoader
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices) # # 样本的读取顺序是随机的
    for i in range(0,num_examples,batch_size):
        j = torch.LongTensor(indices[i : min(i + batch_size,num_examples)]) # 最后一次可能不足一个batch
        yield features.index_select(0, j), labels.index_select(0, j)

In [36]:
batch_size = 10
for x,y in data_iter(batch_size,x,y):
    print(x,y)
    break

tensor([[-0.9200,  1.4616],
        [-1.1124,  0.1067],
        [ 0.4409,  0.0234],
        [ 0.3691,  0.7310],
        [-0.5627,  0.4252],
        [-0.3944,  0.8510],
        [ 0.1661,  1.1447],
        [-0.7260, -0.9729],
        [ 0.9932,  0.4820],
        [ 0.6249,  0.7272]]) tensor([-2.6004,  1.6076,  4.9968,  2.4506,  1.6301,  0.5161,  0.6347,  6.0586,
         4.5370,  2.9787])


#### 初始化模型参数

In [37]:
# 将权重初始化成均值为0、标准差为0.01的正态随机数，偏差则初始化成0
w = torch.tensor(np.random.normal(0,0.01,(num_inputs,1)),dtype=torch.float32)
b = torch.zeros(1,dtype=torch.float32)

In [38]:
w.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True)

tensor([0.], requires_grad=True)

#### 定义模型

In [39]:
def linrg(x,w,b):
#     使用mm函数做矩阵乘法
#     return torch.mm(x,w) + b
    return x @ w + b

#### 定义损失函数
![mse_loss](./img/3.1/mse_loss.png)

In [40]:
# 平方损失来定义线性回归的损失函数
def squared_loss(y_predict,y):
    return ((y_predict - y.view(y_predict.size()))**2 / 2)

#### 定义优化算法

In [41]:
def sgd(params,lr,batch_size):
    for param in params:
        param.data -= param.grad * lr / batch_size

#### 训练模型

In [42]:
num_epochs = 3
lr = 0.03
loss_fn = squared_loss
optimizer = sgd
model = linrg
batch_size = 10
params = [w,b]

In [43]:
w,b

(tensor([[ 0.0046],
         [-0.0225]], requires_grad=True),
 tensor([0.], requires_grad=True))

In [44]:
for epoch in range(num_epochs):
    for x_1,y_1 in data_iter(batch_size,x,y):
        l = loss_fn(model(x_1,w,b),y_1).sum()
#         if params is not None and params[0].grad is not None:
#             for param in params:
#                 param.grad.data.zero_()

        l.backward()
        sgd([w,b],lr,batch_size)
        
        w.grad.data.zero_()
        b.grad.data.zero_()
        
    train_l = loss_fn(model(x,w,b),y)
    print('epoch %d, loss %f' % (epoch + 1, train_l.mean().item()))

epoch 1, loss 5.322900
epoch 2, loss 5.168851
epoch 3, loss 5.022611


In [45]:
print(true_w, '\n', w)
print(true_b, '\n', b)

[2, -3.4] 
 tensor([[ 0.0465],
        [-0.0366]], requires_grad=True)
4.2 
 tensor([0.2005], requires_grad=True)


### 线性回归的简洁实现:
    torch.utils.data模块提供了有关数据处理的工具
    torch.nn模块定义了大量神经网络的层
    torch.nn.init模块定义了各种初始化方法
    torch.optim模块提供了很多常用的优化算法

In [46]:
from torch.utils.data import DataLoader
from torch.nn import init,Module,Linear

#### 读取数据

In [47]:
dataset = torch.utils.data.TensorDataset(x,y)
data_iter_1 = DataLoader(dataset,batch_size=10,shuffle=True)

In [48]:
for x_2,y_2 in data_iter_1:
    print(x_2,y_2)
    break

tensor([[-1.1124,  0.1067],
        [-0.3944,  0.8510],
        [-0.5627,  0.4252],
        [ 0.9932,  0.4820],
        [-0.7260, -0.9729],
        [ 0.4409,  0.0234],
        [ 0.6249,  0.7272],
        [-0.9200,  1.4616],
        [ 0.3691,  0.7310],
        [ 0.1661,  1.1447]]) tensor([ 1.6076,  0.5161,  1.6301,  4.5370,  6.0586,  4.9968,  2.9787, -2.6004,
         2.4506,  0.6347])


#### 定义模型

In [49]:
class LinearNet(torch.nn.Module):
    def __init__(self,n_feature):
        super(LinearNet,self).__init__()
        self.linear = Linear(n_feature,1)
        
    def forward(self,x):
        out = self.linear(x)
        return out

In [50]:
# 还可以用nn.Sequential来更加方便地搭建网络
# model = torch.nn.Sequential(
#     torch.nn.Linear(num_inputs,1),
# )
model = LinearNet(num_inputs)
model

LinearNet(
  (linear): Linear(in_features=2, out_features=1, bias=True)
)

In [51]:
# 通过 model.parameters() 来查看模型所有的可学习参数，此函数将返回一个生成器
for param in model.parameters():
    print(param)

Parameter containing:
tensor([[-0.0659,  0.4888]], requires_grad=True)
Parameter containing:
tensor([0.1405], requires_grad=True)


#### 初始化模型参数

In [52]:
init.normal_(model.linear.weight,mean=0,std=0.01)
init.constant_(model.linear.bias,val=0) # constant_ : 使用val的值来填充输入的Tensor

Parameter containing:
tensor([0.], requires_grad=True)

In [53]:
model.linear.weight,model.linear.bias

(Parameter containing:
 tensor([[-0.0107,  0.0083]], requires_grad=True),
 Parameter containing:
 tensor([0.], requires_grad=True))

#### 定义损失函数

In [54]:
loss_fn = torch.nn.MSELoss()

#### 定义优化算法

In [55]:
optimizer = torch.optim.SGD(model.parameters(),lr=0.03)
optimizer

SGD (
Parameter Group 0
    dampening: 0
    foreach: None
    lr: 0.03
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)

In [56]:
# 为不同子网络设置不同的学习率
# optimizer =optim.SGD([
#                 # 如果对某个参数不指定学习率，就使用最外层的默认学习率
#                 {'params': model.subnet1.parameters()}, # lr=0.03
#                 {'params': model.subnet2.parameters(), 'lr': 0.01}
#             ], lr=0.03)

#### 训练模型

In [57]:
for epoch in range(num_epochs):
    for x_3,y_3 in data_iter_1:
        y_pre = model(x_3)
        loss = loss_fn(y_pre,y_3.view(-1,1))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    l = loss_fn(model(x),y.view(-1,1))
    print('epoch %d, loss: %f' % (epoch, l.mean().item()))

epoch 0, loss: 10.359767
epoch 1, loss: 9.789368
epoch 2, loss: 9.273485


In [58]:
# 从 model 获得需要的层，并访问其权重（weight）和偏差（bias）
dense = model.linear

In [59]:
print(true_w,dense.weight)
print(true_b,dense.bias)

[2, -3.4] Parameter containing:
tensor([[ 0.0744, -0.0288]], requires_grad=True)
4.2 Parameter containing:
tensor([0.3870], requires_grad=True)
