In [1]:
import torch
import torchvision
import numpy as np
import sys
import d2l.torch as d2l


1.获取和读取数据

In [2]:
batch_size = 256
train_iter,test_iter = d2l.load_data_fashion_mnist(batch_size)

100%|██████████| 26.4M/26.4M [00:02<00:00, 11.4MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 111kB/s]
100%|██████████| 4.42M/4.42M [00:01<00:00, 2.72MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 19.7MB/s]


In [3]:
# 你可以试试不同的批量大小
small_batch = 32
medium_batch = 128
large_batch = 512

print("不同批量大小的批次数量：")
print(f"批量大小{small_batch}: 训练集{60000//small_batch}批次, 测试集{10000//small_batch}批次")
print(f"批量大小{medium_batch}: 训练集{60000//medium_batch}批次, 测试集{10000//medium_batch}批次")
print(f"批量大小{large_batch}: 训练集{60000//large_batch}批次, 测试集{10000//large_batch}批次")

不同批量大小的批次数量：
批量大小32: 训练集1875批次, 测试集312批次
批量大小128: 训练集468批次, 测试集78批次
批量大小512: 训练集117批次, 测试集19批次


2.初始化模型参数

In [5]:
num_inputs = 784
num_outputs = 10
w = torch.tensor(np.random.normal(0,0.01,(num_inputs, num_outputs)),dtype = torch.float)
b = torch.zeros(num_outputs, dtype=torch.float)
w.requires_grad_(True)
b.requires_grad_(True)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], requires_grad=True)

In [6]:
#tensor按维度计算
a = torch.tensor([[1, 2, 3], [4, 5, 6]])
print(a.sum(dim = 0,keepdim=True))
print(a.sum(dim = 1,keepdim=True))

tensor([[5, 7, 9]])
tensor([[ 6],
        [15]])


3.实现Softmax运算

In [8]:
def softmax(X):
    X_exp = X.exp()
    partition = X_exp.sum(dim = 1, keepdim = True)
    return X_exp / partition

X = torch.rand((2,5))
print(X)
X_prob = softmax(X)
print(X_prob, X_prob.sum(dim = 1))

tensor([[0.9075, 0.9583, 0.8524, 0.5504, 0.0106],
        [0.4688, 0.7430, 0.5210, 0.7046, 0.4097]])
tensor([[0.2435, 0.2562, 0.2305, 0.1704, 0.0993],
        [0.1793, 0.2359, 0.1889, 0.2270, 0.1690]]) tensor([1.0000, 1.0000])


4.定义模型

In [9]:
def net(X):
    return softmax(torch.mm(X.view(-1,num_inputs),w) + b)

5.定义损失函数

In [11]:
y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])  # 预测概率
y = torch.LongTensor([0, 2])  # 真实标签
result = y_hat.gather(1, y.view(-1, 1))
print(result)

def cross_entropy(y_hat, y):
    return - torch.log(y_hat.gather(1,y.view(-1,1)))

tensor([[0.1000],
        [0.5000]])


6.计算分类准确率

In [14]:
def accuracy(y_hat, y):
    return (y_hat.argmax(dim = 1) == y).float().mean().item()
print(accuracy(y_hat,y))

#模型 net 在数据集 data_iter 上的准确率
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n
print(evaluate_accuracy(test_iter, net))

0.5
0.0422


7.训练模型

In [18]:
# 添加SGD优化函数定义
def sgd(params, lr, batch_size):
    """小批量随机梯度下降算法"""
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size

num_epochs,lr = 5, 0.1
def train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,params=None,lr=None,optimizer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X,y in train_iter:
            y_hat = net(X)
            l = loss(y_hat,y).sum()
            # 梯度清零
            if optimizer is not None:
                optimizer.zero_grad()
            elif params is not None and params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            l.backward()
            if optimizer is None:
                sgd(params,lr,batch_size)
            else:
                optimizer.step()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'% (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [w, b], lr)

epoch 1, loss 0.7879, train acc 0.748, test acc 0.790
epoch 2, loss 0.5716, train acc 0.812, test acc 0.810
epoch 3, loss 0.5257, train acc 0.825, test acc 0.820
epoch 4, loss 0.5009, train acc 0.833, test acc 0.823
epoch 5, loss 0.4857, train acc 0.836, test acc 0.827
