In [1]:
# 介绍一个函数，在分类问题中常用
# torch.argmax
#     input,                   输入张量
#     dim=None,                指定沿哪个维度找最大值的位置（默认：展平成1维后求最大值索引）
#     keepdim=False            是否保持原始维度（True返回shape中保留该dim维)

# 示例：
    # x = torch.tensor([[1, 5, 2],
    #                   [7, 3, 9]])
    # # 按行取最大值的索引（dim=1）
    # row_max_idx = torch.argmax(x, dim=1)
    # print(row_max_idx)  # tensor([1, 2]) -> 每行最大值索引位置
    
    # # 按列取最大值的索引（dim=0）
    # col_max_idx = torch.argmax(x, dim=0)
    # print(col_max_idx)  # tensor([1, 0, 1]) -> 每列最大值索引位置

In [2]:
import torch
import torchvision
from torch import nn
from torch.utils.tensorboard import SummaryWriter
from collections import OrderedDict
from Model_save import *

In [3]:
# 准备数据集
train_data = torchvision.datasets.CIFAR10("../datasets/CIFAR10/", train = True, 
                                          transform = torchvision.transforms.ToTensor(), download = True)
val_data = torchvision.datasets.CIFAR10("../datasets/CIFAR10/", train = False, 
                                          transform = torchvision.transforms.ToTensor(), download = True)

# 查看数据集大小
train_data_size = len(train_data)
val_data_size = len(val_data)
print("训练数据集长度为：{}".format(train_data_size))
print("测试数据集长度为：{}".format(val_data_size))

# 加载数据集
train_dataloader = torch.utils.data.DataLoader(train_data, batch_size = 64)
val_dataloader = torch.utils.data.DataLoader(val_data, batch_size = 64)

Files already downloaded and verified
Files already downloaded and verified
训练数据集长度为：50000
测试数据集长度为：10000


In [4]:
# 搭建神经网络:网络见 Model_save.py
# 创建网络模型
model = LeNet_5()
# input = torch.ones((64, 3, 32, 32))
# print(model(input).shape)

In [5]:
# 创建损失函数
loss_fn = nn.CrossEntropyLoss()

In [6]:
# 定义优化器
# learning_rate = 0.01
# 1e-2 == 1 * (10)^(-2)
learning_rate = 1e-2
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate,)

In [7]:
# 设置训练网络的一些参数
# 记录训练的次数
total_train_step = 0
# 记录测试的次数
total_test_step = 0
# 记录训练的次数
epoch = 10
# 绘制训练图像
writer = SummaryWriter("../logs/17_Model_training")

In [8]:
# 开始训练
for i in range(epoch):
    print("-----------------第 {} 轮训练开始-----------------".format(i + 1))

    # 训练步骤开始
    model.train()
    for data in train_dataloader:
        imgs, targets = data
        outputs = model(imgs)
        # print(outputs.shape)
        # print(targets.shape)
        loss = loss_fn(outputs, targets)

        # 优化优化模型
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 查看训练结果
        total_train_step += 1
        if total_train_step % 100 == 0:
            print("训练次数:{}, loss:{}".format(total_train_step, loss.item()))
            writer.add_scalar("train_loss", loss.item(), total_train_step)


    model.eval()
    # 每一轮训练结束之后在测试集上验证模型的效果，对模型进行评估，在测试集上不对模型进行调优
    # 在with下不会再进行梯度的计算
    total_test_loss = 0
    with torch.no_grad():
        right_sum = 0
        for data in val_dataloader:
            imgs, targets = data
            outputs = model(imgs)
            loss = loss_fn(outputs, targets)
            total_test_loss += loss.item()
            outputs = torch.argmax(outputs, dim = 1)
            # print((outputs == targets).sum())
            # print(outputs)
            right_sum += (outputs == targets).sum()
        print("整体测试集上的loss:{}".format(total_test_loss)) 
        writer.add_scalar("test_loss", total_test_loss, total_test_step)
        print("整体测试集上的正确率:{}".format(right_sum / 10000)) # 测试集有10000张图片
        writer.add_scalar("test_accuracy", right_sum / 10000, total_test_step)
        total_test_step += 1

    # 每一轮训练完需要保存当前模型
    # 方法1： 
    # torch.save(model, "../Models/LeNet_5/LeNet_5_No_{}_Loss_{}.pth".format(epoch, total_test_step))
    # 方法2（更推荐）：
    torch.save(model.state_dict(), "../Models/LeNet_5/LeNet_5_No_{}_Loss_{}.pth".format(epoch, total_test_loss))
    print("模型已保存")


writer.close()

-----------------第 1 轮训练开始-----------------
训练次数:100, loss:2.2855606079101562
训练次数:200, loss:2.2785608768463135
训练次数:300, loss:2.242176055908203
训练次数:400, loss:2.156294345855713
训练次数:500, loss:2.0516607761383057
训练次数:600, loss:2.056602954864502
训练次数:700, loss:1.9835290908813477
整体测试集上的loss:313.39422714710236
整体测试集上的正确率:0.2784000039100647
模型已保存
-----------------第 2 轮训练开始-----------------
训练次数:800, loss:1.8645280599594116


KeyboardInterrupt: 

In [None]:
# 训练过程中的细节：
# 1:
    # model.train() 和 model.eval() 使用有需要注意的地方，有部分特定层的模型必须使用这两个函数，详情请见pytorch官方文档
    # train(mode=True)[source][source]
        # Set the module in training mode.
        # This has an effect only on certain modules. See the documentation of particular modules for details of their behaviors in training/evaluation mode, i.e., whether they are affected, e.g. Dropout, BatchNorm, etc.
