In [1]:
import torchvision
import torch
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image

In [3]:
# 准备数据集
train_set = torchvision.datasets.MNIST(root='./data_dir', train=True, transform=torchvision.transforms.ToTensor(), download=True)
test_set = torchvision.datasets.MNIST(root='./data_dir', train=False, transform=torchvision.transforms.ToTensor(), download=True)

In [4]:
# 查看数据集的相关信息
print('训练集图片数量', train_set.__len__())
print('测试集图片数量', test_set.__len__())

训练集图片数量 60000
测试集图片数量 10000


In [12]:
img, label = train_set[0]
print(img.shape, label)

torch.Size([1, 28, 28]) 5


In [13]:
to_pil = torchvision.transforms.ToPILImage()
to_pil(img).show()

In [None]:
from torch.utils.data import DataLoader
batch_size = 64
train_dataloader = DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True, drop_last=True)
test_dataloader = DataLoader(dataset=test_set, batch_size=batch_size, shuffle=False, drop_last=False)

In [None]:
for img, label in train_dataloader:
    print(img.shape, label.shape)
    break

In [None]:
class MNISTNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc = nn.Linear(28 * 28, 10)  # 输入是1 * 28 * 28，设置为in_features；一共有10类，因此out_features=10
    
    def forward(self, x: torch.Tensor):
        x = x.view(x.shape[0], -1)  # 将x.shape从[N, 1, 28, 28]变换为[N, 1 * 28 * 28]
        return self.fc(x)

In [10]:
input = F.softmax(torch.rand(size=[1, 4]), dim=1)
print('input', input)
target = torch.randint(low=0, high=4, size=[1])
print('target', target)
loss = F.cross_entropy(input, target)
print('loss', loss)
# 手动计算交叉熵
_loss = - torch.log(torch.exp(input[:, target].float()) / torch.sum(torch.exp(input)))
print('手动计算的交叉熵', _loss)

input tensor([[0.2974, 0.2890, 0.2819, 0.1317]])
target tensor([1])
loss tensor(1.3496)
手动计算的交叉熵 tensor([[1.3496]])


In [None]:
# 运行的设备，'cpu'或'cuda:0'、'cuda:1'等。'cuda:0'表示0号显卡
device = 'cuda:0'
# 新建网络。网络默认是在CPU上
net = MNISTNet()
# 将网络移动到device上。注意，nn.Module的to函数是一个原地操作，也就是会将调用者移动到device上
net.to(device)
# 使用SGD优化器；优化的参数是net.parameters()，也就是net的所有可训练参数；学习率1e-3
optimizer = torch.optim.SGD(net.parameters(), lr=1e-3)
# 训练max_epoch轮，也就是遍历这么多次数据集
max_epoch = 10
# 总的迭代次数
iterations = 0