In [2]:
from torchvision import datasets, transforms
from matplotlib import pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
%matplotlib inline

# 构建数据集

对于小数据集, 可以自己把标签和图像一一对应, 直接构建, 因为我们只需要__len__()和__getitem__()就可以处理.

In [4]:
# 下载
data_path = './data-unversioned/'
transformed_cifar10 = datasets.CIFAR10(
    data_path, train=True, download=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.4914, 0.4822, 0.4465),
                             std=(0.2470, 0.2435, 0.2616))
    ]))
transformed_cifar10_val = datasets.CIFAR10(
    data_path, train=False, download=False,
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.4914, 0.4822, 0.4465),
                             std=(0.2470, 0.2435, 0.2616))
    ]))

label_map = {0: 0, 2: 1}  # dictionary
class_names = ['airplane', 'bird']
tr_cifar2 = [(img, label_map[label])
             for img, label in transformed_cifar10 if label in [0, 2]]
tr_cifar2_val = [(img, label_map[label])
                 for img, label in transformed_cifar10_val if label in [0, 2]]

# 构建全连接神经网络模型

对于本例, 需要的输出是[P(plane), P(bird)], 要求概率在0-1且二者和为1. 可以运用Softmax函数来把两个概率转化成这样的输出.

softmax(x1, x2) = (e^x1 / (e^x1 + e^x2), e^x2 / (e^x1 + e^x2)). 可用nn.Softmax(dim=...)调用.

In [None]:
# def softmax(x: torch.Tensor):
#     return torch.exp(x) / torch.exp(x).sum()

In [3]:
model = nn.Sequential(
    nn.Linear(3072, 512),
    nn.Tanh(),
    nn.Linear(512, 2),
    nn.Softmax(dim=1)
)
# let's try indentify one image at first
img, _ = tr_cifar2[0]
img_batch = img.reshape(-1).unsqueeze(0)
out = model(img_batch)
out

tensor([[0.5685, 0.4315]], grad_fn=<SoftmaxBackward0>)

In [None]:
# model = nn.Linear(2, 3)
# input = torch.ones(5, 2)
# model(input), model[0].weight, model[0].bias

# 损失和反向传播

假设输出为[a, b], 我们能通过 a<b 或 a>b 来判断预测类别.

nn.NLLLoss类接受对数概率输入, 损失与预测类别正确概率呈负相关.

In [7]:
# 因此我们要把模型中的 nn.Softmax 改为 nn.LogSoftmax.
# 注: LogSoftmax() + NLLLoss() = CrossEntropyLoss()
model = nn.Sequential(
    nn.Linear(3072, 512),
    nn.Tanh(),
    nn.Linear(512, 2),
    nn.LogSoftmax(dim=1))
loss_fn = nn.NLLLoss()

DataLoader类可以帮我们打乱数据, 采样小批量.

In [5]:
batch_size = 64
train_loader = torch.utils.data.DataLoader(tr_cifar2, batch_size, shuffle=True)

In [9]:
learning_rate = 1e-2
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

n_epochs = 100

for epoch in range(1, n_epochs+1):
    for imgs, labels in train_loader:    # directly pick imgs and labels from DataLoader!
        batch_size = imgs.shape[0]
        outputs = model(imgs.reshape(batch_size, -1))
        loss = loss_fn(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    if epoch <= 3 or epoch % 10 == 0:
        print(f'Epoch: {epoch}, Loss: {float(loss)}')

Epoch: 1, Loss: 0.6322190761566162
Epoch: 2, Loss: 0.37605032324790955
Epoch: 3, Loss: 0.43016284704208374
Epoch: 4, Loss: 0.4807472825050354
Epoch: 5, Loss: 0.4246319532394409
Epoch: 6, Loss: 0.24218589067459106
Epoch: 7, Loss: 0.3127555847167969
Epoch: 8, Loss: 0.3447917401790619
Epoch: 9, Loss: 0.34773409366607666
Epoch: 10, Loss: 0.42815715074539185
Epoch: 11, Loss: 0.4933187663555145
Epoch: 12, Loss: 0.07119683921337128
Epoch: 13, Loss: 0.28145739436149597
Epoch: 14, Loss: 0.5645956993103027
Epoch: 15, Loss: 0.37036722898483276
Epoch: 16, Loss: 0.24130171537399292
Epoch: 17, Loss: 0.10272957384586334
Epoch: 18, Loss: 0.18275919556617737
Epoch: 19, Loss: 0.32651498913764954
Epoch: 20, Loss: 0.26537227630615234
Epoch: 21, Loss: 0.3423563838005066
Epoch: 22, Loss: 0.3817196190357208
Epoch: 23, Loss: 0.27346479892730713
Epoch: 24, Loss: 0.30845358967781067
Epoch: 25, Loss: 0.38492706418037415
Epoch: 26, Loss: 0.1900109499692917
Epoch: 27, Loss: 0.4471464455127716
Epoch: 28, Loss: 0.54

In [15]:
val_loader = torch.utils.data.DataLoader(tr_cifar2_val, batch_size=64, shuffle=False)
correct, total = 0, 0

with torch.no_grad():
    for imgs, labels in val_loader:
        batch_size = imgs.shape[0]
        outputs = model(imgs.reshape(batch_size, -1))
        _, predicted = torch.max(outputs, dim=1)
        total += labels.shape[0]
        correct += int((predicted == labels).sum())

print(f'Validate accuracy: {correct / total * 100} %')

Validate accuracy: 81.35 %


In [26]:
# Count how many parameters are in the model.
numel_list = [p.numel() for p in model.parameters() if p.requires_grad == True]
sum(numel_list), numel_list

(1574402, [1572864, 512, 1024, 2])