# 单层线性神经网络

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

torch.set_printoptions(edgeitems=2, threshold=50)

## 处理输入数据，分割数据集，

In [2]:
x = [35.7, 55.9, 58.2, 81.9, 56.3, 48.9, 33.9, 21.8, 48.4, 60.4, 68.4]
y = [0.5, 14.0, 15.0, 28.0, 11.0, 8.0, 3.0, -4.0, 6.0, 13.0, 21.0]

In [3]:
x = torch.tensor(x).unsqueeze(1)
y = torch.tensor(y).unsqueeze(1)
x, x.shape, y, y.shape

(tensor([[35.7000],
         [55.9000],
         [58.2000],
         [81.9000],
         [56.3000],
         [48.9000],
         [33.9000],
         [21.8000],
         [48.4000],
         [60.4000],
         [68.4000]]),
 torch.Size([11, 1]),
 tensor([[ 0.5000],
         [14.0000],
         [15.0000],
         [28.0000],
         [11.0000],
         [ 8.0000],
         [ 3.0000],
         [-4.0000],
         [ 6.0000],
         [13.0000],
         [21.0000]]),
 torch.Size([11, 1]))

In [4]:
n_samples = x.shape[0]
n_val = int(0.2 * n_samples)

In [5]:
shuffled_indices = torch.randperm(n_samples)

In [6]:
train_indices = shuffled_indices[:-n_val]
val_indices = shuffled_indices[-n_val:]
train_indices, val_indices

(tensor([ 3,  2,  0, 10,  7,  8,  9,  6,  5]), tensor([4, 1]))

In [10]:
train_x = x[train_indices]
train_y = y[train_indices]
val_x = x[val_indices]
val_y = y[val_indices]

train_x_scaled = 0.1 * train_x
val_x_scaled = 0.1 * val_x

In [12]:
train_x_scaled, val_x_scaled

(tensor([[8.1900],
         [5.8200],
         [3.5700],
         [6.8400],
         [2.1800],
         [4.8400],
         [6.0400],
         [3.3900],
         [4.8900]]),
 tensor([[5.6300],
         [5.5900]]))

## 构建线性层

In [20]:
linear_model = nn.Linear(1, 1)  # input size, output size
linear_model(val_x_scaled)

tensor([[4.0013],
        [3.9792]], grad_fn=<AddmmBackward0>)

In [21]:
linear_model.weight, linear_model.bias

(Parameter containing:
 tensor([[0.5517]], requires_grad=True),
 Parameter containing:
 tensor([0.8951], requires_grad=True))

In [24]:
a = torch.ones(1)
a, a.shape, a[0]

(tensor([1.]), torch.Size([1]), tensor(1.))

In [26]:
b = torch.ones(())
b, b.shape
# b[0]  # error

(tensor(1.), torch.Size([]))

b的尺寸为0D，无法作为网络输入

In [27]:
linear_model(a)
# linear_model(b) # error: both arguments to linear need to be at least 1D, but they are 0D and 2D

tensor([1.4468], grad_fn=<ViewBackward0>)

In [None]:
optimizer = optim.SGD(linear_model.parameters(), lr=1e-2)

In [29]:
# 直接调用linear_model的parameters()方法，返回的是一个生成器，而不是一个列表
list(linear_model.parameters())

[Parameter containing:
 tensor([[0.5517]], requires_grad=True),
 Parameter containing:
 tensor([0.8951], requires_grad=True)]

In [30]:
def loss_fn(y_pred, y):
    squared_diffs = (y_pred - y) ** 2
    return squared_diffs.mean()

In [31]:
def training_loop(n_epochs, optimizer, model, loss_fn, train_x, val_x, train_y, val_y):
    for epoch in range(1, n_epochs + 1):
        train_y_pred = model(train_x)
        train_loss = loss_fn(train_y_pred, train_y)

        val_y_pred = model(val_x)
        val_loss = loss_fn(val_y_pred, val_y)

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

        if epoch == 1 or epoch % 500 == 0:
            print(
                f"Epoch {epoch}, Training loss {train_loss.item():.4f}, Validation loss {val_loss.item():.4f}"
            )

In [32]:
linear_model = nn.Linear(1, 1)
optimizer = optim.SGD(linear_model.parameters(), lr=1e-2)

In [33]:
training_loop(
    n_epochs=3000,
    optimizer=optimizer,
    model=linear_model,
    loss_fn=loss_fn,
    train_x=train_x_scaled,
    val_x=val_x_scaled,
    train_y=train_y,
    val_y=val_y,
)

Epoch 1, Training loss 210.7022, Validation loss 172.3331
Epoch 500, Training loss 7.6159, Validation loss 2.7195
Epoch 1000, Training loss 3.5584, Validation loss 2.7262
Epoch 1500, Training loss 3.0504, Validation loss 2.7297
Epoch 2000, Training loss 2.9868, Validation loss 2.7311
Epoch 2500, Training loss 2.9789, Validation loss 2.7316
Epoch 3000, Training loss 2.9779, Validation loss 2.7318


In [34]:
list(linear_model.parameters())

[Parameter containing:
 tensor([[5.3752]], requires_grad=True),
 Parameter containing:
 tensor([-17.2707], requires_grad=True)]

下面使用nn库自带的MSELoss()

In [35]:
training_loop(
    n_epochs=3000,
    optimizer=optimizer,
    model=linear_model,
    loss_fn=nn.MSELoss(),
    train_x=train_x_scaled,
    val_x=val_x_scaled,
    train_y=train_y,
    val_y=val_y,
)

Epoch 1, Training loss 2.9779, Validation loss 2.7318
Epoch 500, Training loss 2.9777, Validation loss 2.7318
Epoch 1000, Training loss 2.9777, Validation loss 2.7319
Epoch 1500, Training loss 2.9777, Validation loss 2.7319
Epoch 2000, Training loss 2.9777, Validation loss 2.7319
Epoch 2500, Training loss 2.9777, Validation loss 2.7319
Epoch 3000, Training loss 2.9777, Validation loss 2.7319


In [36]:
list(linear_model.parameters())

[Parameter containing:
 tensor([[5.3816]], requires_grad=True),
 Parameter containing:
 tensor([-17.3066], requires_grad=True)]