In [1]:
import torch
import torchvision
import numpy as np
import sys

## 3.6.1 获取和读取数据

In [70]:
def load_data_fashion_mnist(batch_size, resize=None, root='~/Datasets/FashionMNIST'):
    """Download the fashion mnist dataset and then load into memory."""
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
        pass

    trans.append(torchvision.transforms.ToTensor())
    transform = torchvision.transforms.Compose(trans)

    mnist_train = torchvision.datasets.FashionMNIST(
        root=root, 
        train=True, 
        download=True, 
        transform=transform
    )
    mnist_test = torchvision.datasets.FashionMNIST(
        root=root, 
        train=False, 
        download=True, 
        transform=transform
    )

    if sys.platform.startswith('win'):
        num_workers = 0  # 0表示不用额外的进程来加速读取数据
    else:
        num_workers = 4

    train_iter = torch.utils.data.DataLoader(
        mnist_train, 
        batch_size=batch_size, 
        shuffle=True,
        num_workers=num_workers
    )

    test_iter = torch.utils.data.DataLoader(
        mnist_test, 
        batch_size=batch_size, 
        shuffle=False, 
        num_workers=num_workers
    )
    return train_iter, test_iter

In [71]:
batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)

## 3.6.2 初始化模型参数

In [72]:
num_inputs = 784  # 28 * 28 像素点
num_outputs = 10

W = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_outputs)), 
                 dtype=torch.float
                )

b = torch.zeros(num_outputs, 
                dtype=torch.float)


W.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True) 

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

## 3.6.3 实现softmax运算

在介绍如何定义softmax回归之前，我们先描述一下对如何对多维`Tensor`按维度操作。在下面的例子中，给定一个`Tensor`矩阵`X`。我们可以只对其中同一列（`dim=0`）或同一行（`dim=1`）的元素求和，并在结果中保留行和列这两个维度（`keepdim=True`）。

In [2]:
X = torch.tensor([[1, 2, 3], 
                  [4, 5, 6]]
                )

print(X.sum(dim=0, keepdim=True))
print(X.sum(dim=1, keepdim=True))

# 返回 e^x 计算结果
print(X.exp())
print(X)

tensor([[5, 7, 9]])
tensor([[ 6],
        [15]])
tensor([[  2.7183,   7.3891,  20.0855],
        [ 54.5981, 148.4132, 403.4288]])
tensor([[1, 2, 3],
        [4, 5, 6]])


In [3]:
def softmax(X):
    X_exp = X.exp()
    partition = X_exp.sum(dim=1, keepdim=True)
    return X_exp / partition

对于随机输入，我们将每个元素变成了非负数，且每一行和为1。

In [4]:
X = torch.rand((2, 5))
X_prob = softmax(X)


print("X_prob: ", X_prob)

print("X_prob.sum(dim=1): ", X_prob.sum(dim=1))

X_prob:  tensor([[0.2421, 0.1958, 0.1768, 0.1116, 0.2737],
        [0.1519, 0.2072, 0.2872, 0.2161, 0.1376]])
X_prob.sum(dim=1):  tensor([1.0000, 1.0000])


## 3.6.4 定义模型

有了softmax运算，我们可以定义上节描述的softmax回归模型了。这里通过`view`函数将每张原始图像改成长度为`num_inputs`的向量。

In [76]:
def net(X):
    # y = W ◆ X + B
    return softmax(torch.mm(X.view((-1, num_inputs)), W) + b)

## 3.6.5 定义损失函数

上一节中，我们介绍了softmax回归使用的交叉熵损失函数。为了得到标签的预测概率，我们可以使用`gather`函数。在下面的例子中，变量`y_hat`是2个样本在3个类别的预测概率，变量`y`是这2个样本的标签类别。通过使用`gather`函数，我们得到了2个样本的标签的预测概率。与3.4节（softmax回归）数学表述中标签类别离散值从1开始逐一递增不同，在代码中，标签类别的离散值是从0开始逐一递增的。

> torch.FloatTensor是32位浮点类型数据，torch.LongTensor是64位整型
> 
> torch.tensor是一个类，用于生成一个单精度浮点类型的张量。

In [77]:
y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])

y = torch.LongTensor([0, 2])
print("y: ", y)

res = y_hat.gather(dim=1, index=y.view(-1, 1))

res_cress_entropy = torch.log(res)
print("损失函数计算为: ", res_cress_entropy)

y:  tensor([0, 2])
损失函数计算为:  tensor([[-2.3026],
        [-0.6931]])


In [78]:
y.view(-1, 1)

tensor([[0],
        [2]])

In [79]:
# 损失函数 y_hat 与 y的损失值计算
def cross_entropy(y_hat, y):
    
    # y标签值, 映射到 y_hat输出值上
    res = y_hat.gather(1, y.view(-1, 1))
    
    # 计算损失
    return - torch.log(res)

## 3.6.6 计算分类准确率

In [80]:
y_hat.argmax(dim=1)

y

tensor([0, 2])

In [81]:
def accuracy(y_hat, y):
    res = y_hat.argmax(dim=1) == y
    return res.float().mean().item()

In [82]:
print(accuracy(y_hat, y))

0.5


In [83]:
# 该函数将被逐步改进：它的完整实现将在“图像增广”一节中描述

# 精度计算
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    
    for X, y in data_iter:
        
        # 网络输出值
        network_value_y = net(X)
        
        # 取 网络输出值 最大, 与 y 比较
        res_bool = network_value_y.argmax(dim=1) == y
        
        # 统计正确数
        acc_sum += res_bool.float().sum().item()
        
        # 长度合计 += 256
        n += y.shape[0]
        pass
    
    # 正确百分比
    accuracy_percentage = acc_sum / n
    return accuracy_percentage

In [84]:
print(evaluate_accuracy(test_iter, net))

0.0603


## 3.6.7 训练模型

In [85]:
# 参数更新
def sgd(params, lr, batch_size):
    # 为了和原书保持一致，这里除以了batch_size，但是应该是不用除的，
    # 因为一般用PyTorch计算loss时就默认已经沿batch维求了平均了。
    for param in params:
        # 注意这里更改param时用的param.data
        param.data -= lr * param.grad / batch_size 
        pass

In [95]:
num_epochs, lr = 20, 0.1


# 本函数已保存在d2lzh包中方便以后使用
def train_ch3(net, train_iter, test_iter, loss, 
              num_epochs, batch_size, 
              params=None, 
              lr=None, 
              optimizer=None):
    
    # epoch
    for epoch in range(num_epochs):
        
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        
        # 训练数据
        for X, y in train_iter:
            
            # 计算网络输出值 y_hat
            y_hat = net(X)
            
            # 网络损失计算
            l = loss(y_hat, y).sum()
            
            # 梯度清零
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            
            # 计算反向传播
            l.backward()
            
            if optimizer is None:
                # 梯度下降
                sgd(params, lr, batch_size)
            else:
                # 优化器(这里用不到)
                optimizer.step()  # “softmax回归的简洁实现”一节将用到

            # 损失数合计
            train_l_sum += l.item()

            # 正确数计算
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            
            # 总数n
            n += y.shape[0]
            pass
    
    
        epoch_index = epoch + 1        # 训练批次
        # test_acc = evaluate_accuracy(test_iter, net)  # 测试数据 正确百分比
        test_acc = -1.0
        loss_point = train_l_sum / n   # 损失 百分比
        train_acc = train_acc_sum / n  # 训练 正确百分比
        
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' 
              % (epoch_index, loss_point, train_acc, test_acc)
             )
        pass
    pass

In [96]:
import time


start = time.time()

train_ch3(net, 
          train_iter,
          test_iter, 
          cross_entropy, 
          num_epochs, 
          batch_size, 
          [W, b], 
          lr
         )

print("耗时: ", time.time() - start)

epoch 1, loss 0.4119, train acc 0.859, test acc -1.000
epoch 2, loss 0.4112, train acc 0.859, test acc -1.000
epoch 3, loss 0.4096, train acc 0.860, test acc -1.000
epoch 4, loss 0.4086, train acc 0.860, test acc -1.000
epoch 5, loss 0.4081, train acc 0.861, test acc -1.000
epoch 6, loss 0.4070, train acc 0.860, test acc -1.000
epoch 7, loss 0.4056, train acc 0.861, test acc -1.000
epoch 8, loss 0.4051, train acc 0.861, test acc -1.000
epoch 9, loss 0.4051, train acc 0.861, test acc -1.000
epoch 10, loss 0.4035, train acc 0.862, test acc -1.000
epoch 11, loss 0.4032, train acc 0.861, test acc -1.000
epoch 12, loss 0.4022, train acc 0.862, test acc -1.000
epoch 13, loss 0.4013, train acc 0.862, test acc -1.000
epoch 14, loss 0.4006, train acc 0.863, test acc -1.000
epoch 15, loss 0.3998, train acc 0.863, test acc -1.000
epoch 16, loss 0.3992, train acc 0.863, test acc -1.000
epoch 17, loss 0.3993, train acc 0.862, test acc -1.000
epoch 18, loss 0.3975, train acc 0.864, test acc -1.000
e

## 3.6.8 预测

In [91]:
test_iter

<torch.utils.data.dataloader.DataLoader at 0x7f422f349820>

In [93]:
def get_fashion_mnist_labels(labels):
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
                   'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    return [text_labels[int(i)] for i in labels]

In [94]:
correct_num = 0
error_num = 0

for i, value in enumerate(test_iter):
    X, y = value[0], value[1]
    
    true_labels = get_fashion_mnist_labels(y.numpy())
    pred_labels = get_fashion_mnist_labels(net(X).argmax(dim=1).numpy())

    # titles = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)]

    # d2l.show_fashion_mnist(X[0:9], titles[0:9])
    
    for true, pred in zip(true_labels, pred_labels):
        if pred == true:
            correct_num += 1
        else:
            error_num += 1
        pass

    
print("correct_num: %s" % correct_num)
print("error_num: %s" % error_num)
print("正确率: %s" % (correct_num / (correct_num+ error_num)))

correct_num: 8409
error_num: 1591
正确率: 0.8409
