In [1]:
import torch
from torch import nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from d2l import torch as d2l
# 定义网络
net = nn.Sequential(
    nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=2), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Conv2d(96, 128*2, kernel_size=5, padding=2), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Conv2d(128*2, 192*2, kernel_size=3, padding=1), nn.ReLU(),
    nn.Conv2d(192*2, 192*2, kernel_size=3, padding=1), nn.ReLU(),
    nn.Conv2d(192*2, 128*2, kernel_size=3, padding=1), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Flatten(),
    nn.Linear(6*6*256, 2048*2), nn.ReLU(), nn.Dropout(p=0.5),
    nn.Linear(2048*2, 2048*2), nn.ReLU(), nn.Dropout(p=0.5),
    nn.Linear(2048*2, 10), nn.ReLU(),
)
# 下载并配置数据集
trans = [transforms.ToTensor()]
trans.insert(0, transforms.Resize(224))
trans = transforms.Compose(trans)
train_dataset = datasets.FashionMNIST(root='./dataset', train=True,
                                      transform=trans, download=True)
test_dataset = datasets.FashionMNIST(root='./dataset', train=False,
                                     transform=trans, download=True)

# 配置数据加载器
batch_size = 64
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
                         batch_size=batch_size, shuffle=True)
def train(net, train_iter, test_iter, epochs, lr, device):
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    net.apply(init_weights)
    print(f'Training on:[{device}]')
    net.to(device)
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss = nn.CrossEntropyLoss()
    timer, num_batches = d2l.Timer(), len(train_iter)
    for epoch in range(epochs):
        # 训练损失之和，训练准确率之和，样本数
        metric = d2l.Accumulator(3)
        net.train()
        for i, (X, y) in enumerate(train_iter):
            timer.start()
            optimizer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            with torch.no_grad():
                metric.add(l * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
            timer.stop()
            train_l = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]
            if (i + 1) % (num_batches // 30) == 0 or i == num_batches - 1:
                print(f'Epoch: {epoch+1}, Step: {i+1}, Loss: {train_l:.4f}')
        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        print(
            f'Train Accuracy: {train_acc*100:.2f}%, Test Accuracy: {test_acc*100:.2f}%')
    print(f'{metric[2] * epochs / timer.sum():.1f} examples/sec '
          f'on: [{str(device)}]')
    torch.save(net.state_dict(),
               f"./model/AlexNet_Epoch{epochs}_Accuracy{test_acc*100:.2f}%.pth")
epochs, lr = 10, 0.1
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 训练模型
train(net, train_loader, test_loader, epochs, lr, device)
# 加载保存的模型
# net.load_state_dict(torch.load("./model/AlexNet_Epoch20_Accuracy91.38%.pth"))
def show_predict():
    # 预测结果图像可视化
    net.to(device)
    loader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=True)
    plt.figure(figsize=(12, 8))
    name = ['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat',
            'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
    for i in range(9):
        (images, labels) = next(iter(loader))
        images = images.to(device)
        labels = labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        title = f"Predicted: {name[int(predicted[0])]}, True: {name[int(labels[0])]}"
        plt.subplot(3, 3, i + 1)
        plt.imshow(images.cpu()[0].squeeze())
        plt.title(title)
        plt.xticks([])
        plt.yticks([])
    plt.show()


show_predict()


Training on:[cuda:0]


  return F.conv2d(input, weight, bias, self.stride,


Epoch: 1, Step: 31, Loss: 2.2880
Epoch: 1, Step: 62, Loss: 2.2421
Epoch: 1, Step: 93, Loss: 2.2029
Epoch: 1, Step: 124, Loss: 2.1749
Epoch: 1, Step: 155, Loss: 2.1425
Epoch: 1, Step: 186, Loss: 2.1064
Epoch: 1, Step: 217, Loss: 2.0634
Epoch: 1, Step: 248, Loss: 2.0107
Epoch: 1, Step: 279, Loss: 1.9637
Epoch: 1, Step: 310, Loss: 1.9274
Epoch: 1, Step: 341, Loss: 1.8985
Epoch: 1, Step: 372, Loss: 1.8699
Epoch: 1, Step: 403, Loss: 1.8488
Epoch: 1, Step: 434, Loss: 1.8259
Epoch: 1, Step: 465, Loss: 1.8069
Epoch: 1, Step: 496, Loss: 1.7901
Epoch: 1, Step: 527, Loss: 1.7752
Epoch: 1, Step: 558, Loss: 1.7611
Epoch: 1, Step: 589, Loss: 1.7475
Epoch: 1, Step: 620, Loss: 1.7353
Epoch: 1, Step: 651, Loss: 1.7233
Epoch: 1, Step: 682, Loss: 1.7139
Epoch: 1, Step: 713, Loss: 1.7052
Epoch: 1, Step: 744, Loss: 1.6976
Epoch: 1, Step: 775, Loss: 1.6898
Epoch: 1, Step: 806, Loss: 1.6824
Epoch: 1, Step: 837, Loss: 1.6749
Epoch: 1, Step: 868, Loss: 1.6688
Epoch: 1, Step: 899, Loss: 1.6634
Epoch: 1, Step: 9

RuntimeError: Parent directory ./model does not exist.