In [2]:
import torch
import torchvision
import numpy as np
import sys

print(torch.__version__)

1.13.1+cu117


## 3.6.1 获取和读取数据

In [6]:
def load_data_fashion_mnist(batch_size, resize=None, root='~/Datasets/FashionMNIST'):
    """Download the fashion mnist dataset and then load into memory."""
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
        pass

    trans.append(torchvision.transforms.ToTensor())
    transform = torchvision.transforms.Compose(trans)

    mnist_train = torchvision.datasets.FashionMNIST(
        root=root, 
        train=True, 
        download=False, 
        transform=transform
    )
    mnist_test = torchvision.datasets.FashionMNIST(
        root=root, 
        train=False, 
        download=False, 
        transform=transform
    )

    if sys.platform.startswith('win'):
        num_workers = 0  # 0表示不用额外的进程来加速读取数据
    else:
        num_workers = 4

    train_iter = torch.utils.data.DataLoader(
        mnist_train, 
        batch_size=batch_size, 
        shuffle=True,
        num_workers=num_workers
    )

    test_iter = torch.utils.data.DataLoader(
        mnist_test, 
        batch_size=batch_size, 
        shuffle=False, 
        num_workers=num_workers
    )
    return train_iter, test_iter

In [7]:
batch_size = 256


dataRoot = '/mnt/g1t/ai_data/Datasets_on_HHD/FashionMNIST'
train_iter, test_iter = load_data_fashion_mnist(batch_size, root=dataRoot)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /mnt/g1t/ai_data/Datasets_on_HHD/FashionMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting /mnt/g1t/ai_data/Datasets_on_HHD/FashionMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz to /mnt/g1t/ai_data/Datasets_on_HHD/FashionMNIST/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /mnt/g1t/ai_data/Datasets_on_HHD/FashionMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting /mnt/g1t/ai_data/Datasets_on_HHD/FashionMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /mnt/g1t/ai_data/Datasets_on_HHD/FashionMNIST/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /mnt/g1t/ai_data/Datasets_on_HHD/FashionMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting /mnt/g1t/ai_data/Datasets_on_HHD/FashionMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /mnt/g1t/ai_data/Datasets_on_HHD/FashionMNIST/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /mnt/g1t/ai_data/Datasets_on_HHD/FashionMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting /mnt/g1t/ai_data/Datasets_on_HHD/FashionMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /mnt/g1t/ai_data/Datasets_on_HHD/FashionMNIST/FashionMNIST/raw



## 3.6.2 初始化模型参数

In [8]:
num_inputs = 784
num_outputs = 10

device = torch.device("cuda")

W = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_outputs)), 
                 device=device,
                 dtype=torch.float
                )

b = torch.zeros(num_outputs, 
                device=device,
                dtype=torch.float)

In [9]:
W.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True) 

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',
       requires_grad=True)

## 3.6.3 实现softmax运算

In [10]:
X = torch.tensor([[1, 2, 3], [4, 5, 6]], 
                )


print(X.sum(dim=0, keepdim=True))
print(X.sum(dim=1, keepdim=True))

tensor([[5, 7, 9]])
tensor([[ 6],
        [15]])


In [11]:
def softmax(X):
    X_exp = X.exp().to(device)
    partition = X_exp.sum(dim=1, keepdim=True).to(device)
    return X_exp / partition

In [12]:
X = torch.rand((2, 5)).to(device)
X_prob = softmax(X).to(device)

print(X_prob, X_prob.sum(dim=1))

tensor([[0.2436, 0.1905, 0.1415, 0.1775, 0.2469],
        [0.2413, 0.2236, 0.1923, 0.1494, 0.1933]], device='cuda:0') tensor([1., 1.], device='cuda:0')


## 3.6.4 定义模型

In [13]:
def net(X):
    X = X.to(device)
    
    return softmax(torch.mm(X.view((-1, num_inputs)).to(device), 
                            W).to(device) + b.to(device)).to(device)

## 3.6.5 定义损失函数

In [14]:
y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]]).to(device)

y = torch.LongTensor([0, 2]).to(device)

y_hat.gather(1, y.view(-1, 1))

tensor([[0.1000],
        [0.5000]], device='cuda:0')

In [15]:
# 损失函数
def cross_entropy(y_hat, y):
    y_hat = y_hat.to(device)
    y = y.to(device)
    return - torch.log(y_hat.gather(1, y.view(-1, 1))).to(device)

## 3.6.6 计算分类准确率

In [16]:
def accuracy(y_hat, y):
    return (y_hat.argmax(dim=1) == y).float().mean().item()

In [17]:
print(accuracy(y_hat, y))

0.5


In [18]:
# 本函数已保存在d2lzh_pytorch包中方便以后使用。
# 该函数将被逐步改进：它的完整实现将在“图像增广”一节中描述
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        X = X.to(device)
        y = y.to(device)
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return (acc_sum / n)

In [19]:
print(evaluate_accuracy(test_iter, net))

0.1071


## 3.6.7 训练模型

In [20]:
def sgd(params, lr, batch_size):
    # 为了和原书保持一致，这里除以了batch_size，但是应该是不用除的，因为一般用PyTorch计算loss时就默认已经
    # 沿batch维求了平均了。
    for param in params:
        # 注意这里更改param时用的param.data
        param.data -= lr * param.grad / batch_size 
        pass

In [21]:
num_epochs, lr = 20, 0.1


# 本函数已保存在d2lzh包中方便以后使用
def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size,
              params=None, lr=None, optimizer=None):
    
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        
        for X, y in train_iter:
            y_hat = net(X)
            l = loss(y_hat, y).sum()
            
            # 梯度清零
            if optimizer is not None:
                optimizer.zero_grad()

            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()

            l.backward()
            if optimizer is None:
                sgd(params, lr, batch_size)
            else:
                optimizer.step()  # “softmax回归的简洁实现”一节将用到
                pass
            
            train_l_sum += l.item()
            
            #target = np.array(train_acc_sum).astype(float)
            #target = torch.from_numpy(target)
            #train_acc_sum = target.to(device)
            # print(train_acc_sum)
            
            y_hat = y_hat.to(device)
            y = y.to(device)
            
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
            pass
    
        # test_acc = evaluate_accuracy(test_iter, net)
        test_acc = -1.0
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
        pass
    pass

In [22]:
import time
start = time.time()

# 开始训练
train_ch3(net, 
          train_iter,
          test_iter, 
          cross_entropy, 
          num_epochs, 
          batch_size, 
          [W, b], 
          lr
         )

print("耗时: ", time.time() - start)

epoch 1, loss 0.7868, train acc 0.749, test acc -1.000
epoch 2, loss 0.5700, train acc 0.813, test acc -1.000
epoch 3, loss 0.5248, train acc 0.825, test acc -1.000
epoch 4, loss 0.5010, train acc 0.832, test acc -1.000
epoch 5, loss 0.4850, train acc 0.837, test acc -1.000
epoch 6, loss 0.4734, train acc 0.841, test acc -1.000
epoch 7, loss 0.4657, train acc 0.843, test acc -1.000
epoch 8, loss 0.4580, train acc 0.845, test acc -1.000
epoch 9, loss 0.4526, train acc 0.846, test acc -1.000
epoch 10, loss 0.4470, train acc 0.848, test acc -1.000
epoch 11, loss 0.4438, train acc 0.849, test acc -1.000
epoch 12, loss 0.4400, train acc 0.850, test acc -1.000
epoch 13, loss 0.4358, train acc 0.851, test acc -1.000
epoch 14, loss 0.4342, train acc 0.852, test acc -1.000
epoch 15, loss 0.4307, train acc 0.853, test acc -1.000
epoch 16, loss 0.4288, train acc 0.853, test acc -1.000
epoch 17, loss 0.4262, train acc 0.855, test acc -1.000
epoch 18, loss 0.4244, train acc 0.856, test acc -1.000
e

## 3.6.8 预测

In [23]:
test_iter

<torch.utils.data.dataloader.DataLoader at 0x7f1716442e50>

In [24]:
def get_fashion_mnist_labels(labels):
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
                   'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    return [text_labels[int(i)] for i in labels]

In [25]:
correct_num = 0
error_num = 0

for i, value in enumerate(test_iter):
    X, y = value[0].to(device), value[1].to(device)
    
    y = y.cpu()
    # X = X.cpu()
    
    true_labels = get_fashion_mnist_labels(y.numpy())
    pred_labels = get_fashion_mnist_labels(net(X).cpu().argmax(dim=1).numpy())

    # titles = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)]

    # d2l.show_fashion_mnist(X[0:9], titles[0:9])
    
    for true, pred in zip(true_labels, pred_labels):
        if pred == true:
            correct_num += 1
        else:
            error_num += 1
        pass

    
print("correct_num: %s" % correct_num)
print("error_num: %s" % error_num)
print("正确率: %s" % (correct_num / (correct_num+ error_num)))

correct_num: 8398
error_num: 1602
正确率: 0.8398
