In [None]:
# 使用gpu训练的第一种方式
# 调用：
#     网络模型
#     数据
#     损失函数
# 的.cuda()

In [40]:
import torch
import torchvision
from torch import nn
from torch.utils.tensorboard import SummaryWriter
from collections import OrderedDict

In [None]:
class LeNet_5(nn.Module):
    def __init__(self):
        super(LeNet_5, self).__init__()
        self.model1 = nn.Sequential(OrderedDict([
            ('conv1', nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, padding=2)),
            ('pool1', nn.MaxPool2d(kernel_size=2)),
            ('conv2', nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, padding=2)),
            ('pool2', nn.MaxPool2d(kernel_size=2)),
            ('conv3', nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding=2)),
            ('pool3', nn.MaxPool2d(kernel_size=2)),
            ('flatten', nn.Flatten()),
            ('fc1', nn.Linear(1024, 64)),  # 注意：1024 = 64通道 × 4 × 4（针对输入32x32）
            ('fc2', nn.Linear(64, 10))
        ]))

    def forward(self, x):
        return self.model1(x)

In [11]:
# 准备数据集
train_data = torchvision.datasets.CIFAR10("../datasets/CIFAR10/", train = True, 
                                          transform = torchvision.transforms.ToTensor(), download = True)
val_data = torchvision.datasets.CIFAR10("../datasets/CIFAR10/", train = False, 
                                          transform = torchvision.transforms.ToTensor(), download = True)

# 如何查看数据集大小
train_data_size = len(train_data)
val_data_size = len(val_data)
print("训练数据集长度为：{}".format(train_data_size))
print("测试数据集长度为：{}".format(val_data_size))

# 加载数据集
train_dataloader = torch.utils.data.DataLoader(train_data, batch_size = 64)
val_dataloader = torch.utils.data.DataLoader(val_data, batch_size = 64)

Files already downloaded and verified
Files already downloaded and verified
训练数据集长度为：50000
测试数据集长度为：10000


In [21]:
# 搭建神经网络:网络见 Model_save.py
# 创建网络模型
# 对网络模型调用.cuda()
model = LeNet_5()
if torch.cuda.is_available():
    model = model.cuda()
# input = torch.ones((64, 3, 32, 32))
# print(model(input).shape)

In [22]:
# 创建损失函数
# 对损失函数调用.cuda()
loss_fn = nn.CrossEntropyLoss()
if torch.cuda.is_available():
    loss_fn = loss_fn.cuda()

In [23]:
# 定义优化器
# learning_rate = 0.01
# 1e-2 == 1 * (10)^(-2)
learning_rate = 1e-2
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

In [50]:
# 设置训练网络的一些参数
# 记录训练的次数
total_train_step = 0
# 记录测试的次数
total_test_step = 0
# 记录训练的次数
epoch = 10
# 绘制训练图像
writer = SummaryWriter("../logs/17_Model_training")

In [51]:
# 开始训练
for i in range(epoch):
    print("-----------------第 {} 轮训练开始-----------------".format(i + 1))

    # 训练步骤开始
    model.train()
    for data in train_dataloader:
        # 对数据调用.cuda()
        imgs, targets = data
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            targets = targets.cuda()
        outputs = model(imgs)
        # print(outputs.shape)
        # print(targets.shape)
        loss = loss_fn(outputs, targets)

        # 优化优化模型
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 查看训练结果
        total_train_step += 1
        if total_train_step % 100 == 0:
            print("训练次数:{}, loss:{}".format(total_train_step, loss.item()))
            writer.add_scalar("train_loss", loss.item(), total_train_step)


    model.eval()
    # 每一轮训练结束之后在测试集上验证模型的效果，对模型进行评估，在测试集上不对模型进行调优
    # 在with下不会再进行梯度的计算
    total_test_loss = 0
    with torch.no_grad():
        right_sum = 0
        for data in val_dataloader:
            # 对数据调用.cuda()
            imgs, targets = data
            if torch.cuda.is_available():
                imgs = imgs.cuda()
                targets = targets.cuda()
            outputs = model(imgs)
            loss = loss_fn(outputs, targets)
            total_test_loss += loss.item()
            outputs = torch.argmax(outputs, dim = 1)
            # print((outputs == targets).sum())
            # print(outputs)
            right_sum += (outputs == targets).sum()
        print("整体测试集上的loss:{}".format(total_test_loss)) 
        writer.add_scalar("test_loss", total_test_loss, total_test_step)
        print("整体测试集上的正确率:{}".format(right_sum / 10000)) # 测试集有10000张图片
        writer.add_scalar("test_accuracy", right_sum / 10000, total_test_step)
        total_test_step += 1

    # 每一轮训练完需要保存当前模型
    # 方法1： 
    # torch.save(model, "../Models/LeNet_5/LeNet_5_No_{}_Loss_{}.pth".format(epoch, total_test_step))
    # 方法2（更推荐）：
    torch.save(model.state_dict(), "../Models/LeNet_5/LeNet_5_No_{}_Loss_{}.pth".format(epoch, total_test_step))
    print("模型已保存")


writer.close()

-----------------第 1 轮训练开始-----------------
训练次数:100, loss:0.8016112446784973
训练次数:200, loss:0.8249855637550354
训练次数:300, loss:1.0304747819900513
训练次数:400, loss:0.924564003944397
训练次数:500, loss:0.891964316368103
训练次数:600, loss:0.8696330189704895
训练次数:700, loss:0.9919231534004211
整体测试集上的loss:166.91454285383224
整体测试集上的正确率:0.6330000162124634
模型已保存
-----------------第 2 轮训练开始-----------------
训练次数:800, loss:0.8474284410476685
训练次数:900, loss:0.745090663433075
训练次数:1000, loss:1.0707513093948364
训练次数:1100, loss:0.9083277583122253
训练次数:1200, loss:0.94816654920578
训练次数:1300, loss:0.7184140682220459
训练次数:1400, loss:0.7533657550811768
训练次数:1500, loss:0.9320236444473267
整体测试集上的loss:165.18901270627975
整体测试集上的正确率:0.636900007724762
模型已保存
-----------------第 3 轮训练开始-----------------
训练次数:1600, loss:0.5900333523750305
训练次数:1700, loss:0.7078320980072021
训练次数:1800, loss:0.6901295185089111
训练次数:1900, loss:0.9763226509094238
训练次数:2000, loss:1.0766396522521973
训练次数:2100, loss:0.6616396903991699
训练次数:2200, los

KeyboardInterrupt: 