In [2]:
import os
import mnist_loader as ml
import torch.nn as nn
import torch
import math
from torch.utils.data import DataLoader
from torch.autograd import Variable
from torchvision import transforms

In [3]:
class NetSimple(nn.Module):
    def __init__(self, c):
        super(NetSimple, self).__init__()
        self.conv = nn.Conv2d(3, 16, 3, 1, 1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(2)
        self.linear = nn.Linear(14*14*16, c)
    def forward(self, x):
        conv_out = self.conv(x)
        relu_out = self.relu(conv_out)
        pool_out = self.pool(relu_out)
        res = pool_out.view(pool_out.size(0), -1)
        out = self.linear(res)
        return out

In [None]:
def train(dp, bs, epc, cuda):
    os.makedirs('./output', exist_ok=True)
    # os.makedirs 创建一个名为 output 的目录，如果已经存在则不抛出异常。
    if True: #not os.path.exists('output/total.txt'):
        ml.image_list(dp, 'output/total.txt')
        # 从指定的数据路径生成一个包含图像文件和标签的列表文件。
        ml.shuffle_split('output/total.txt', 'output/train.txt', 'output/val.txt')

    train_data = ml.MyDataset(txt='output/train.txt', transform=transforms.ToTensor())
    val_data = ml.MyDataset(txt='output/val.txt', transform=transforms.ToTensor())
    # transforms.ToTensor() 将图像数据转换为 PyTorch 的张量格式。
    train_loader = DataLoader(dataset=train_data, batch_size=bs, shuffle=True)
    val_loader = DataLoader(dataset=val_data, batch_size=bs)
    # DataLoader 将数据集包装为可迭代的数据加载器，支持批处理和打乱数据。

    model = NetSimple(10)
    #model = models.vgg16(num_classes=10)
    #model = models.resnet18(num_classes=10)  # 调用内置模型
    #model.load_state_dict(torch.load('./output/params_10.pth'))
    from torchsummary import summary
    summary(model, (3, 28, 28))
    # 3 -> RGB 3 channels, 28 -> height, width

    if cuda:
        print('training with cuda')
        model.cuda()
        # 检查是否可以使用 CUDA（GPU 加速），如果可以则将模型移动到 GPU
        
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-3)
    # 初始化 Adam 优化器，设置学习率和权重衰减
    # Adam自动调整学习率
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [20, 30], 0.1)
    # 在特定的训练轮次（20 和 30）调整学习率，变为原来的10%（0.1）
    loss_func = nn.CrossEntropyLoss()
    # 交叉熵损失函数

    for epoch in range(epc):
        # training-----------------------------------
        model.train()
        train_loss = 0
        train_acc = 0
        for batch, (batch_data, batch_label) in enumerate(train_loader):
            # 使用 enumerate 来迭代训练数据加载器（train_loader），每次获取一个批次的数据和标签。
            # 迭代时自动进行getitem方法，将img和label返回给batch_data和batch_label
            # batch_data 是当前批次的输入数据，通常是一个张量，包含多个样本
            # batch_label 是当前批次的标签，也通常是一个张量，包含与 batch_data 对应的标签
            if cuda:
                batch_data, batch_label = Variable(batch_data.cuda()), Variable(batch_label.cuda())
            else:
                batch_data, batch_label = Variable(batch_data), Variable(batch_label)
            # 根据是否使用 CUDA 将数据移动到 GPU 或保留在 CPU 上

            out = model(batch_data)  # 256x3x28x28  out 256x10
            # 将输入批次（batch_data）传入模型，得到输出（out）。注释说明输入是 256 张 28x28 的 RGB 图像，输出是 256 个类别的预测。
            
            loss = loss_func(out, batch_label)
            # 计算输出与真实标签之间的损失，使用交叉熵损失函数（loss_func）。
            train_loss += loss.item()
            # 将当前批次的损失添加到总损失中。
            
            pred = torch.max(out, 1)[1]
            # torch.max(out, 1)[1] 计算预测的类别。
            train_correct = (pred == batch_label).sum()
            # (pred == batch_y).sum() 计算正确预测的数量。
            train_acc += train_correct.item()
            # 将正确预测的数量累加到总准确率中。
            
            print('epoch: %2d/%d batch %3d/%d  Train Loss: %.3f, Acc: %.3f'
                  % (epoch + 1, epc, batch, math.ceil(len(train_data) / bs),
                     loss.item(), train_correct.item() / len(batch_data)))

            optimizer.zero_grad()
            # 在进行反向传播之前，先将优化器中的梯度清零。
            loss.backward()
            # 反向传播
            optimizer.step()
            # 更新参数
            
        scheduler.step()  # 更新learning rate
        print('Train Loss: %.6f, Acc: %.3f' % (train_loss / (math.ceil(len(train_data) / bs)), train_acc / (len(train_data))))

        # evaluation--------------------------------
        model.eval()
        eval_loss = 0
        eval_acc = 0
        for batch_data, batch_label in val_loader:
            if cuda:
                batch_data, batch_label = Variable(batch_data.cuda()), Variable(batch_label.cuda())
            else:
                batch_data, batch_label = Variable(batch_data), Variable(batch_label)

            out = model(batch_data)
            loss = loss_func(out, batch_label)
            eval_loss += loss.item()
            pred = torch.max(out, 1)[1]
            num_correct = (pred == batch_label).sum()
            eval_acc += num_correct.item()
        print('Val Loss: %.6f, Acc: %.3f' % (eval_loss / (math.ceil(len(val_data) / bs)), eval_acc / (len(val_data))))
        # 保存模型。每隔多少帧存模型，此处可修改------------
        if (epoch + 1) % 10 == 0:
            # torch.save(model, 'output/model_' + str(epoch+1) + '.pth')
            torch.save(model.state_dict(), 'output/params_' + str(epoch + 1) + '.pth')
        # 每个 epoch 保存一次模型参数，方便后续加载和继续训练

In [5]:
# 设置参数
datapath = '/Users/sunyuliang/Desktop/CV/Python/n_imgs/train_images'
batch_size = 256
epochs = 300
use_cuda = torch.cuda.is_available()

# 调用训练函数
train(datapath, batch_size, epochs, use_cuda)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 28, 28]             448
              ReLU-2           [-1, 16, 28, 28]               0
         MaxPool2d-3           [-1, 16, 14, 14]               0
            Linear-4                   [-1, 10]          31,370
Total params: 31,818
Trainable params: 31,818
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.22
Params size (MB): 0.12
Estimated Total Size (MB): 0.35
----------------------------------------------------------------
epoch:  1/300 batch   0/188  Train Loss: 2.311, Acc: 0.102
epoch:  1/300 batch   1/188  Train Loss: 3.386, Acc: 0.090
epoch:  1/300 batch   2/188  Train Loss: 1.835, Acc: 0.375
epoch:  1/300 batch   3/188  Train Loss: 1.800, Acc: 0.527
epoch:  1/300 batch   4/188  Train Loss: 1.681, Acc: 0.543
epoch