In [1]:
import torch
from torch import tensor
import numpy as np
from tqdm import tqdm
import torchvision
import torchvision.transforms as transforms

# 1、加载Fashion-MNIST数据集（采用已划分好的训练集和测试集）
mnist_train=torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST',train=True,download=True,transform=transforms.ToTensor())
mnist_test=torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST',train=False,download=True,transform=transforms.ToTensor())

# 2、通过Dataloader读取小批量数据样本
batch_size=256
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=0)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=0)

# 3、手动构建模型
num_inputs = 784
num_outputs = 10  # 共10类

# 3.1参数初始化
W = torch.normal(0, 0.1, (num_inputs, num_outputs), dtype=torch.float32)  # 784*10
b = torch.normal(0, 0.01, (1, num_outputs), dtype=torch.float32)  # 偏差参数1*10
# 模型的参数梯度
W.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True)

# 3.2 softmax回归模型
def softmax(X):  # softmax计算
    X_exp = X.exp()  # 对每个元素做指数运算
    partition = X_exp.sum(dim=1, keepdim=True)  # 求列和，即对同行元素求和 n*1
    return X_exp / partition  # broadcast
def net(X):
    return softmax(torch.mm(X.view((-1, num_inputs)), W) + b)

#3.3 交叉熵损失函数
def loss(y_hat, y):
    return - torch.log(y_hat.gather(1, y.view(-1, 1)))

# 3.4 优化器
def sgd(params, lr, batch_size):
    for param in params:
        param.data -= lr * param.grad / batch_size  # 注意这里更改param时用的param.data

# 4、计算分类准确率
def evaluate_accurcy(data_iter, net):
    right_count, all_num = 0.0, 0
    for x, y in data_iter:
        right_count += (net(x).argmax(dim=1) == y).float().sum().item()
        all_num += y.shape[0]
    return right_count / all_num

# argmax()和argmin()函数可以寻找向量所在的最小值和最大值的下标，dim选择查找的维度
# 5、模型训练
lr = 0.1
num_epochs = 5
for epoch in range(num_epochs):
    train_right_sum, train_all_sum, train_loss_sum = 0.0, 0, 0.0
    for X, y in tqdm(train_iter):  # tqdm显示训练进度条
        y_hat = net(X)
        l = loss(y_hat, y).sum()  # 计算loss
        l.backward()  # 求梯度
        sgd([W, b], lr, batch_size)  # 参数更新
        W.grad.data.zero_()
        b.grad.data.zero_()  # 梯度清零
        train_loss_sum += l.item()  # 损失
        train_right_sum += (y_hat.argmax(dim=1) == y).sum().item()  # 训练集准确率
        train_all_sum += y.shape[0]
    test_acc = evaluate_accurcy(test_iter, net)  # 测试集准确率
    print('epoch %d, loss %.4f' % (epoch+1, train_loss_sum/train_all_sum))
    print('train accuracy %.3f, test accuracy %.3f' % (train_right_sum/train_all_sum, test_acc))


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:12<00:00, 19.03it/s]
  1%|▋                                                                                 | 2/235 [00:00<00:14, 15.67it/s]

epoch 1, loss 0.8011
train accuracy 0.741, test accuracy 0.786


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:17<00:00, 13.27it/s]
  1%|▋                                                                                 | 2/235 [00:00<00:17, 13.46it/s]

epoch 2, loss 0.5776
train accuracy 0.809, test accuracy 0.808


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:22<00:00, 10.68it/s]
  0%|                                                                                          | 0/235 [00:00<?, ?it/s]

epoch 3, loss 0.5339
train accuracy 0.820, test accuracy 0.817


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:21<00:00, 10.87it/s]
  1%|▋                                                                                 | 2/235 [00:00<00:19, 12.15it/s]

epoch 4, loss 0.5082
train accuracy 0.829, test accuracy 0.820


100%|████████████████████████████████████████████████████████████████████████████████| 235/235 [00:15<00:00, 15.44it/s]


epoch 5, loss 0.4915
train accuracy 0.833, test accuracy 0.826
