## 多项式函数拟合实验
旨在说明正常拟合，过拟合与欠拟合

In [62]:
import torch
import numpy as np
import sys

### 生成数据集

In [63]:
n_train, n_test, true_w, true_b=100,100,[1.2,-3.4,5.6],5

features=torch.randn((n_train+n_test,1))
poly_features=torch.cat((features,torch.pow(features,2),torch.pow(features,3)),1)
# poly_features将x,x^2,x^3拼接成每行三个数据，同时作为输入

labels=(true_w[0]*poly_features[:,0]+true_w[1]*poly_features[:,1]+true_w[2]*poly_features[:,2]+true_b)
labels+=torch.tensor(np.random.normal(0,0.01,size=labels.size()),dtype=torch.float) # 加噪

### 定义loss,optim，训练模型

In [64]:
num_epochs=100
loss=torch.nn.MSELoss()

# 定义模型
def train(train_features,test_features,train_labels,test_labels):
    batch_size=10
    dataset=torch.utils.data.TensorDataset(train_features,train_labels)
    train_iter=torch.utils.data.DataLoader(dataset,batch_size,shuffle=True)
    net=torch.nn.Linear(train_features.shape[-1],1)
    optimizer=torch.optim.SGD(net.parameters(),lr=0.01)

    for epoch in range(num_epochs):
        for X,y in train_iter:
            l=loss(net(X),y.view(-1,1))
            optimizer.zero_grad()
            l.backward()
            optimizer.step()

    train_loss=loss(net(train_features),train_labels.view(-1,1)).item()
    test_loss=loss(net(test_features),test_labels.view(-1,1)).item()

    print('train loss', train_loss, '\ntest loss', test_loss)
    print('\ntrue_w',true_w, '\ntrue_b',true_b)
    print('\nout_w:',net.weight.data, '\nout_b',net.bias.data)


### 正常拟合
训练集和测试集1:1

In [65]:
train_features=poly_features[:n_train,:]
test_features=poly_features[n_train:,:]
train_labels=labels[:n_train]
test_labels=labels[n_train:]
train(train_features,test_features,train_labels,test_labels)

train loss 0.00011122498835902661 
test loss 0.000156621536007151

true_w [1.2, -3.4, 5.6] 
true_b 5

out_w: tensor([[ 1.2110, -3.3986,  5.5975]]) 
out_b tensor([4.9974])


### 过拟合
训练样本过少（此处仅提供一个batch的训练样本）

In [66]:
train_features=poly_features[:10,:]
test_features=poly_features[10:,:]
train_labels=labels[:10]
test_labels=labels[10:]
train(train_features,test_features,train_labels,test_labels)

train loss 2.593590497970581 
test loss 4.325687408447266

true_w [1.2, -3.4, 5.6] 
true_b 5

out_w: tensor([[ 1.9771, -2.3236,  5.2276]]) 
out_b tensor([2.6715])


### 欠拟合
使用线性模型拟合非线性的多项式函数

In [67]:
train_features=poly_features[:n_train,0].view(-1,1) #或者直接用features[...,:]
# poly_features[100,0]size为[100](一维)，需要加view后size为[100,1]，才可与net运算
test_features=poly_features[n_train:,0].view(-1,1)
train_labels=labels[:n_train]
test_labels=labels[n_train:]
train(train_features,test_features,train_labels,test_labels)

train loss 103.31962585449219 
test loss 285.002197265625

true_w [1.2, -3.4, 5.6] 
true_b 5

out_w: tensor([[17.4956]]) 
out_b tensor([-0.0892])
