1.模型的保存与加载

In [1]:
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# 一会将此模型进行本地保存
vgg16 = torchvision.models.vgg16(pretrained=True, progress=True)

In [5]:
# 保存方式一：保存模型结构+参数
torch.save(vgg16, 'vgg16_method_1.pth')

In [None]:
# 加载保存的模型
model = torch.load('vgg16_method_1.pth')
model

In [7]:
# 保存方式二：保存模型参数[官方推荐的方式] 这种保存方式比方式一要小
torch.save(vgg16.state_dict(), 'vgg16_method_2.pth')

In [None]:
# 加载方式二保存的模型
# torch.load('vgg16_method_2.pth')    # 得到的是参数字典

# 要想重建模型，需要再定义模型然后加载此权重
model_method_2 = torchvision.models.vgg16()
model_method_2.load_state_dict(torch.load('vgg16_method_2.pth'))

In [10]:
# 用方式一保存的陷阱
# 比如现在有一个自定义模型
class MyModel(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv = nn.Conv2d(3, 32, 3)
    
    def forward(self, x):
        return self.conv(x)


# 实例化自定义模型
mymodel = MyModel()
# 方式一保存自定义模型
torch.save(mymodel, 'mymodel_method_1.pth')

In [2]:
# 自定义模型方式一加载时会出现问题
# 我这里没出问题，因为上面定义了MyModel模型结构，如果没定义的话会报错。报错也复现了
torch.load('mymodel_method_1.pth')

AttributeError: Can't get attribute 'MyModel' on <module '__main__'>

2.完整的训练套路

In [2]:
# 准备数据集
train_dataset = torchvision.datasets.CIFAR10('data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = torchvision.datasets.CIFAR10('data', train=False, transform=transforms.ToTensor(), download=True)
print(f'train_dataset长度为:{len(train_dataset)}\ntest_dataset长度为:{len(test_dataset)}')

Files already downloaded and verified
Files already downloaded and verified
train_dataset长度为:50000
test_dataset长度为:10000


In [3]:
# 构建DataLoader
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [5]:
"""
loss可以上cuda，model可以上cuda，数据可以在迭代时上cuda
"""
# 实例化自定义模型，复制过来
from model import MySequence

model = MySequence()
model = model.cuda()    # model上GPU

# 定义loss和optim
loss = nn.CrossEntropyLoss()
loss = loss.cuda()  # loss上GPU
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)

# 定义一些参数
total_train_step = 0    # 记录训练的次数
total_test_step = 0     # 记录测试的次数
epochs = 30

# 训练过程loss可视化
writer = SummaryWriter('logs')
for epoch in range(epochs):
    for i, data in enumerate(train_loader):
        imgs, labels = data
        imgs, labels = imgs.cuda(), labels.cuda()
        outs = model(imgs)
        l = loss(outs, labels)
        writer.add_scalar('train loss', l.item(), total_train_step)
        
        optimizer.zero_grad()
        l.backward()
        optimizer.step()

        total_train_step += 1
        if total_train_step % 100 == 0:
            print(f'total_train_step:{total_train_step}  loss:{l:.7f}')
    
    # 测试步骤，因为不需要优化，所以取消梯度计算
    with torch.no_grad():
        loss_test = 0
        test_acc = 0
        for data in test_loader:
            imgs, labels = data
            imgs, labels = imgs.cuda(), labels.cuda()
            outs = model(imgs)
            test_acc += (outs.argmax(dim=1) == labels).sum()
            loss_test += loss(outs, labels)
        writer.add_scalar('test loss', loss_test.item(), total_test_step)
        writer.add_scalar('test accuracy', test_acc / len(test_loader.dataset), total_test_step)
        total_test_step += 1
        print(f'--------------------> test acc:{test_acc / len(test_loader.dataset)} test loss total:{loss_test:.7f}')

writer.close()

total_train_step:100  loss:2.2911484
total_train_step:200  loss:2.3007643
total_train_step:300  loss:2.2574663
total_train_step:400  loss:2.2474570
total_train_step:500  loss:2.1314514
total_train_step:600  loss:2.0383046
total_train_step:700  loss:1.9208713
--------------------> test acc:0.29659998416900635 test loss total:308.2482300
total_train_step:800  loss:1.9407473
total_train_step:900  loss:1.9111099
total_train_step:1000  loss:1.9652811
total_train_step:1100  loss:1.7590046
total_train_step:1200  loss:1.8395827
total_train_step:1300  loss:1.7512043
total_train_step:1400  loss:1.8687602
total_train_step:1500  loss:1.8587788
--------------------> test acc:0.2586999833583832 test loss total:337.0223083
total_train_step:1600  loss:1.6178354
total_train_step:1700  loss:1.7533450
total_train_step:1800  loss:1.7263081
total_train_step:1900  loss:1.6362153
total_train_step:2000  loss:1.4000463
total_train_step:2100  loss:1.7219518
total_train_step:2200  loss:1.4748170
total_train_step

3.模型推断

In [11]:
from PIL import Image
model.eval()
with torch.no_grad():
    image = Image.open('data/images/2007_000033.jpg')
    imgTensor = transforms.Compose([transforms.Resize((32, 32)),
                                    transforms.ToTensor()])(image).unsqueeze(0).cuda()
    print(f'imgTensor {imgTensor.shape}')
    output = model(imgTensor)
    print(f'output {output.shape} class: {output.argmax(-1)}\n{test_dataset.class_to_idx}')

imgTensor torch.Size([1, 3, 32, 32])
output torch.Size([1, 10]) class: tensor([0], device='cuda:0')
{'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}
