In [18]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.nn.init as init
import torch.utils.data as Data
import torchvision
import torchvision.transforms as transforms
import numpy

In [19]:
# 读入数据
mnist_train = torchvision.datasets.FashionMNIST(root='E:/Datasets/FashionMNIST', train=True, transform=transforms.ToTensor())
mnist_test = torchvision.datasets.FashionMNIST(root='E:/Datasets/FashionMNIST', train=False, transform=transforms.ToTensor())

batch_size = 256
train_iter = Data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True)
test_iter = Data.DataLoader(mnist_test, batch_size=batch_size, shuffle=True)

In [20]:
# 定义模型
# 注意：nn.CrossEntropyLoss()中有softmax运算，所以模型中不需要再重复
class SoftmaxNet(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super(SoftmaxNet, self).__init__()
        self.linear = nn.Linear(num_inputs, num_outputs)
    
    def forward(self, x):
        y = self.linear(x.view(x.shape[0], -1))
        return y

In [25]:
# 建立模型
num_inputs = 28 * 28
num_outputs = 10

net = SoftmaxNet(num_inputs, num_outputs)
init.normal_(net.linear.weight, mean=0, std=0.01)
init.constant_(net.linear.bias, val=0)

Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

In [22]:
# 定义损失函数和优化算法
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)

In [23]:
# 训练集准确率计算
def evaluate_accuracy(data_iter, net, device):
    acc_sum, n = 0.0, 0
    for x, y in data_iter:
        x = x.to(device)
        y = y.to(device)
        y_hat = net(x)
        acc_sum += (y_hat.argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n

In [26]:
# 训练模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net.to(device)

num_epochs = 5
for epoch in range(num_epochs):
    train_l_sum, train_acc_sum, n, m = 0.0, 0.0, 0, 0
    for x, y in train_iter:
        x = x.to(device)
        y = y.to(device)
        y_hat = net(x)
        l = loss(y_hat, y)
        optimizer.zero_grad()
        l.backward()
        optimizer.step()
        
        train_l_sum += l.item()
        train_acc_sum += (net(x).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
        m += 1
    test_acc = evaluate_accuracy(test_iter, net, device)
    print(f'epoch {epoch + 1: d}, loss {train_l_sum / m: .2f}, train_acc {train_acc_sum / n: .2f}, test_acc {test_acc: .2f}')

epoch  1, loss  2.29, train_acc  0.12, test_acc  0.12
epoch  2, loss  2.29, train_acc  0.12, test_acc  0.12
epoch  3, loss  2.29, train_acc  0.12, test_acc  0.12
epoch  4, loss  2.29, train_acc  0.12, test_acc  0.12
epoch  5, loss  2.29, train_acc  0.12, test_acc  0.12
