In [48]:
# https://blog.csdn.net/out_of_memory_error/article/details/81266231
"""
from itertools import count
x = count(0, 1)
next(x)
"""
from itertools import count
import torch
import torch.autograd
import torch.nn.functional as F
 
POLY_DEGREE = 3

![title](poly.PNG)
在PyTorch里面使用torch.cat()函数来实现Tensor的拼接：
对于输入的n个数据，我们将其扩展成上面矩阵所示的样子。

In [49]:
def make_features(x):
    """Builds features i.e. a matrix with columns [x, x^2, x^3, x^4]."""
    x = x.unsqueeze(1)
    return torch.cat([x ** i for i in range(1, POLY_DEGREE+1)], dim=1)

然后定义出我们需要拟合的多项式，可以随机抽取一个多项式来作为我们的目标多项式。当然，系数w和偏置b确定了，多项式也就确定了：

In [65]:
W_target = torch.randn(POLY_DEGREE, 1)
b_target = torch.randn(1)
print(W_target.shape)
def f(x):
    """Approximated function."""
    return x.mm(W_target) + b_target.item()

"""
x = torch.tensor([[1., -1.], [1., -1.]])
y = torch.tensor([[2, 2], [2, 2]], dtype=torch.float32)
print(x.mm(y))
"""

torch.Size([3, 1])


'\nx = torch.tensor([[1., -1.], [1., -1.]])\ny = torch.tensor([[2, 2], [2, 2]], dtype=torch.float32)\nprint(x.mm(y))\n'

这里的权重已经定义好了，x.mm(W_target)表示做矩阵乘法，就是每次输入一个得到一个的真实函数。

在训练的时候我们需要采样一些点，可以随机生成一批数据来得到训练集。下面的函数可以让我们每次取batch_size这么多个数据，然后将其转化为矩阵形式，再把这个值通过函数之后的结果也返回作为真实的输出值：


In [66]:
def get_batch(batch_size=32):
    """Builds a batch i.e. (x, f(x)) pair."""
    random = torch.randn(batch_size)
    x = make_features(random)
    y = f(x)
    return x, y

接下来我们需要定义模型，这里采用一种简写的方式定义模型，torch.nn.Linear()表示定义一个线性模型，这里定义了是输入值和目标参数w的行数一致（和POLY_DEGREE一致，本次实验中为3），输出值为1的模型。

In [80]:
# Define model
fc = torch.nn.Linear(W_target.size(0), 1)  # w1*x1 + w2*x2 + w3*x3 + b = y 一顆neural就是一個線性迴歸

In [81]:
# 下面开始训练模型，训练的过程让其不断优化，直到随机取出的batch_size个点中计算出来的均方误差小于0.001为止。
for batch_idx in count(1):
    # Get data
    batch_x, batch_y = get_batch()

    # Reset gradients
    fc.zero_grad()
    
    # Forward pass
    output = F.smooth_l1_loss(fc(batch_x), batch_y)
    loss = output.item()  # Use torch.Tensor.item() to get a Python number from a tensor containing a single value:

    # Backward pass
    output.backward()
    
    # Apply gradients (no optimizer)
    for param in fc.parameters():
        param.data.add_(-0.1 * param.grad.data)

    # Stop criterion
    if loss < 1e-3:
        break

In [82]:

def poly_desc(W, b):
    """Creates a string description of a polynomial."""
    result = 'y = '
    for i, w in enumerate(W):
        result += '{:+.2f} x^{} '.format(w, len(W) - i)
    result += '{:+.2f}'.format(b[0])
    return result
 
print('Loss: {:.6f} after {} batches'.format(loss, batch_idx))
print('==> Learned function:\t' + poly_desc(fc.weight.view(-1), fc.bias))
print('==> Actual function:\t' + poly_desc(W_target.view(-1), b_target))


Loss: 0.000957 after 70 batches
==> Learned function:	y = +0.47 x^3 -1.38 x^2 -0.58 x^1 +1.03
==> Actual function:	y = +0.48 x^3 -1.39 x^2 -0.59 x^1 +1.08


In [84]:
print(fc.weight.view(-1))
print(fc.bias)

tensor([ 0.4744, -1.3772, -0.5847], grad_fn=<ViewBackward>)
Parameter containing:
tensor([1.0346], requires_grad=True)


In [90]:
fc.weight.view((3, 1)) # reshape

tensor([[ 0.4744],
        [-1.3772],
        [-0.5847]], grad_fn=<ViewBackward>)