In [3]:
import torch
import torchvision
import numpy as np
import sys

## 3.6.1 获取和读取数据

In [4]:
def load_data_fashion_mnist(batch_size, resize=None, root='~/Datasets/FashionMNIST'):
    """Download the fashion mnist dataset and then load into memory."""
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
        pass

    trans.append(torchvision.transforms.ToTensor())
    transform = torchvision.transforms.Compose(trans)

    mnist_train = torchvision.datasets.FashionMNIST(
        root=root, 
        train=True, 
        download=True, 
        transform=transform
    )
    mnist_test = torchvision.datasets.FashionMNIST(
        root=root, 
        train=False, 
        download=True, 
        transform=transform
    )

    if sys.platform.startswith('win'):
        num_workers = 0  # 0表示不用额外的进程来加速读取数据
    else:
        num_workers = 4

    train_iter = torch.utils.data.DataLoader(
        mnist_train, 
        batch_size=batch_size, 
        shuffle=True,
        num_workers=num_workers
    )

    test_iter = torch.utils.data.DataLoader(
        mnist_test, 
        batch_size=batch_size, 
        shuffle=False, 
        num_workers=num_workers
    )
    return train_iter, test_iter

In [5]:
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)

## 3.6.2 初始化模型参数

In [6]:
num_inputs = 784
num_outputs = 10

device = torch.device("cuda")

W = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_outputs)), 
                 dtype=torch.float
                )

b = torch.zeros(num_outputs, 
                dtype=torch.float)

In [7]:
W.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True) 

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

## 3.6.3 实现softmax运算

In [8]:
X = torch.tensor([[1, 2, 3], 
                  [4, 5, 6]]
                )


print(X.sum(dim=0, keepdim=True))
print(X.sum(dim=1, keepdim=True))

tensor([[5, 7, 9]])
tensor([[ 6],
        [15]])


In [9]:
def softmax(X):
    X_exp = X.exp()
    partition = X_exp.sum(dim=1, keepdim=True)
    return X_exp / partition

In [10]:
X = torch.rand((2, 5))
X_prob = softmax(X)

print(X_prob, X_prob.sum(dim=1))

tensor([[0.2786, 0.1917, 0.1661, 0.2445, 0.1192],
        [0.2765, 0.1720, 0.1517, 0.1750, 0.2247]]) tensor([1.0000, 1.0000])


## 3.6.4 定义模型

In [11]:
def net(X):
    return softmax(torch.mm(X.view((-1, num_inputs)), W) + b)

## 3.6.5 定义损失函数

In [12]:
y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]]).cuda(0)

y = torch.LongTensor([0, 2]).cuda(0)

y_hat.gather(1, y.view(-1, 1))

tensor([[0.1000],
        [0.5000]], device='cuda:0')

In [13]:
def cross_entropy(y_hat, y):
    return - torch.log(y_hat.gather(1, y.view(-1, 1)))

## 3.6.6 计算分类准确率

In [14]:
def accuracy(y_hat, y):
    return (y_hat.argmax(dim=1) == y).float().mean().item()

In [15]:
print(accuracy(y_hat, y))

0.5


In [16]:
# 本函数已保存在d2lzh_pytorch包中方便以后使用。
# 该函数将被逐步改进：它的完整实现将在“图像增广”一节中描述
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n

In [17]:
print(evaluate_accuracy(test_iter, net))

0.0716


## 3.6.7 训练模型

In [18]:
def sgd(params, lr, batch_size):
    # 为了和原书保持一致，这里除以了batch_size，但是应该是不用除的，因为一般用PyTorch计算loss时就默认已经
    # 沿batch维求了平均了。
    for param in params:
        # 注意这里更改param时用的param.data
        param.data -= lr * param.grad / batch_size 
        pass

In [19]:
num_epochs, lr = 20, 0.1


# 本函数已保存在d2lzh包中方便以后使用
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
              params=None, lr=None, optimizer=None):
    
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y in train_iter:
            y_hat = net(X)
            l = loss(y_hat, y).sum()
            
            # 梯度清零
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            
            l.backward()
            if optimizer is None:
                sgd(params, lr, batch_size)
            else:
                optimizer.step()  # “softmax回归的简洁实现”一节将用到
            
            
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()

            n += y.shape[0]
            pass
    
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
        pass
    pass

In [20]:
import time


start = time.time()

train_ch3(net, 
          train_iter,
          test_iter, 
          cross_entropy, 
          num_epochs, 
          batch_size, 
          [W, b], 
          lr
         )

print("耗时: ", time.time() - start)

epoch 1, loss 0.7892, train acc 0.748, test acc 0.791
epoch 2, loss 0.5712, train acc 0.812, test acc 0.807
epoch 3, loss 0.5249, train acc 0.825, test acc 0.812
epoch 4, loss 0.5006, train acc 0.832, test acc 0.824
epoch 5, loss 0.4858, train acc 0.837, test acc 0.827
epoch 6, loss 0.4741, train acc 0.839, test acc 0.832
epoch 7, loss 0.4653, train acc 0.843, test acc 0.828
epoch 8, loss 0.4576, train acc 0.846, test acc 0.833
epoch 9, loss 0.4526, train acc 0.847, test acc 0.833
epoch 10, loss 0.4484, train acc 0.847, test acc 0.833
epoch 11, loss 0.4432, train acc 0.849, test acc 0.830
epoch 12, loss 0.4388, train acc 0.850, test acc 0.836
epoch 13, loss 0.4362, train acc 0.851, test acc 0.835
epoch 14, loss 0.4331, train acc 0.852, test acc 0.836
epoch 15, loss 0.4302, train acc 0.853, test acc 0.837
epoch 16, loss 0.4282, train acc 0.854, test acc 0.839
epoch 17, loss 0.4258, train acc 0.855, test acc 0.837
epoch 18, loss 0.4234, train acc 0.855, test acc 0.834
epoch 19, loss 0.42

## 3.6.8 预测

In [21]:
test_iter

<torch.utils.data.dataloader.DataLoader at 0x7f59a062fa00>

In [22]:
def get_fashion_mnist_labels(labels):
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
                   'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    return [text_labels[int(i)] for i in labels]

In [23]:
correct_num = 0
error_num = 0

for i, value in enumerate(test_iter):
    X, y = value[0], value[1]
    
    true_labels = get_fashion_mnist_labels(y.numpy())
    pred_labels = get_fashion_mnist_labels(net(X).argmax(dim=1).numpy())

    # titles = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)]

    # d2l.show_fashion_mnist(X[0:9], titles[0:9])
    
    for true, pred in zip(true_labels, pred_labels):
        if pred == true:
            correct_num += 1
        else:
            error_num += 1
        pass

    
print("correct_num: %s" % correct_num)
print("error_num: %s" % error_num)
print("正确率: %s" % (correct_num / (correct_num+ error_num)))

correct_num: 8355
error_num: 1645
正确率: 0.8355
