In [1]:
import torch
from torch import nn
import torchvision
from torchvision import transforms
from torch.utils import data

In [17]:
def load_data_fashion_mnist(batch_size, resize=None):
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(
        root="data", train=True, transform=trans, download=True)
    mnist_test = torchvision.datasets.FashionMNIST(
        root="data", train=False, transform=trans, download=True)
    return (data.DataLoader(mnist_train, batch_size, shuffle=True,
                            num_workers=4),
            data.DataLoader(mnist_test, batch_size, shuffle=False,
                            num_workers=4))

In [18]:
train_iter, test_iter = load_data_fashion_mnist(batch_size=256)

In [19]:
# 定义网络
# PyTorch不会隐式地调整输⼊的形状。因此，
# 在线性层前定义了展平层（flatten），来调整⽹络输⼊的形状
# 好比假如输入数据是(256,1,28,28)，nn.Flatten会将数据转换为(256,784)，
net = nn.Sequential(nn.Flatten(), nn.Linear(784, 10))


# 初始化网络模型参数
def init_weights(m):
    # 全连接层的初始化策略
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)


net.apply(init_weights)

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=10, bias=True)
)

In [20]:
# 损失函数
"""
溢出问题：
softmax函数exp计算如果其中的⼀些数值⾮常⼤，那么exp可能⼤于数据类型容许的最⼤数字，即上溢（overflow）。
这将使分母或分⼦变为inf（⽆穷⼤），最后得到的是0、 inf、nan（不是数字）。
在这些情况下，我们⽆法得到⼀个明确定义的交叉熵值。
如果把数据中所有值减去其中最大值，那么很可能让exp计算结果为0，然后计算log的交叉熵时趋于-inf，造成下溢问题。
解决方案的原理：见127p
解决方案的函数：loss = nn.CrossEntropyLoss(reduction='none')
"""
loss = nn.CrossEntropyLoss(reduction='none')

In [21]:
# 优化函数
trainer = torch.optim.SGD(net.parameters(), lr=0.1)

In [23]:
# 训练
num_epochs = 10


class Accumulator:
    def __init__(self, n):
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]


def accuracy(y_hat, y):
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1)
    # 将y_hat进行类型转换
    cmp = y_hat.type(y.dtype) == y
    # 返回预测正确的个数
    return float(cmp.type(y.dtype).sum())


def train_epoch_ch3(net, train_iter, loss, updater):
    if isinstance(net, torch.nn.Module):
        net.train()
    metric = Accumulator(3)
    for X, y in train_iter:
        y_hat = net(X)
        l = loss(y_hat, y)
        if isinstance(updater, torch.optim.Optimizer):
            updater.zero_grad()
            l.mean().backward()
            updater.step()
        else:
            l.sum().backward()
            updater(X.shape[0])
        metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
    return metric[0] / metric[2], metric[1] / metric[2]


def evaluate_accuracy(net, data_iter):
    if isinstance(net, torch.nn.Module):
        net.eval()  # 将模型设置为评估模式
    metric = Accumulator(2)  # 正确预测数、预测总数
    with torch.no_grad():
        for X, y in data_iter:
            metric.add(accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]


def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
    for epoch in range(num_epochs):
        train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
        test_acc = evaluate_accuracy(net, test_iter)
        print(train_metrics, test_acc)

# 训练
train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)

(0.44337307516733804, 0.84965) 0.8341
(0.4389319231033325, 0.8499) 0.8324
(0.43565808022816976, 0.8519166666666667) 0.8336
(0.4333226386388143, 0.8518) 0.8355
(0.43013443806966145, 0.85365) 0.8333
(0.4285124114990234, 0.85425) 0.8381
(0.42558628187179565, 0.8546666666666667) 0.8369
(0.4241584191640218, 0.8554833333333334) 0.8372
(0.4224583350499471, 0.85545) 0.8371
(0.4198314774195353, 0.85705) 0.8379


In [26]:
X,y=next(iter(test_iter))
X.shape,y.shape

(torch.Size([256, 1, 28, 28]), torch.Size([256]))

In [33]:
(torch.argmax(net(X),axis=1)==y).sum()/len(y)

tensor(0.8555)