In [1]:
# 导入模块
import torch
import torch.nn as nn
import argparse
import random
import os
from PIL import Image
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import numpy as np
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from torchvision import datasets, transforms
from torch.autograd import Variable
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.metrics import accuracy_score

In [2]:
import torch
torch.cuda.empty_cache()

In [3]:
BASE_DIR = "/Users/fcccasa/jupyter_project"
device = torch.device("mps")
print(device)

mps


In [4]:
random.seed(1)
rmb_label = {"1": 0, "100": 1}

class CatDogDataset(Dataset):
    def __init__(self, data_dir, mode="train", split_n=0.9, rng_seed=620, transform=None):
        """
        rmb面额分类任务的Dataset
        :param data_dir: str, 数据集所在路径
        :param transform: torch.transform，数据预处理
        """
        self.mode = mode              # 模式设置，默认是“train”，即训练模式
        self.data_dir = data_dir      # 数据集所在的路径
        self.rng_seed = rng_seed      # 随机数种子，确保数据集划分的一致性
        """
            设置了随机数种子，例如 random.seed(self.rng_seed)，那么每次执行 random.shuffle 时，
            shuffle 会按照固定的方式打乱数据，使得每次划分出的训练集和验证集都是一样的，从而 保证数据集划分的一致性。
        """
        self.split_n = split_n        # 训练集与验证集的比例，默认90%训练，10%验证
        self.data_info = self._get_img_info()  # data_info存储所有图片路径和标签，在DataLoader中通过index读取样本
        self.transform = transform    # # 用于数据预处理的transform

    def __getitem__(self, index):
        path_img, label = self.data_info[index]       # 根据index从data_info中获取图片路径和标签
        img = Image.open(path_img).convert('RGB')     # 0~255 # 打开图片并转化为RGB模式（确保每张图片是3通道)

        if self.transform is not None:  # 如果有transform操作，应用于图片
            img = self.transform(img)   # 将图片转换为tensor等形式，进行预处理

        return img, label

    def __len__(self):
        if len(self.data_info) == 0:
            raise Exception("\ndata_dir:{} is a empty dir! Please checkout your path to images!".format(self.data_dir))
        return len(self.data_info)  # 回数据集中样本的数量

    def _get_img_info(self):
        # 获取图片信息
        img_names = os.listdir(self.data_dir)  # 获取数据集目录下所有文件的文件名
        img_names = list(filter(lambda x: x.endswith('.jpg'), img_names))  # 只保留.jpg文件

        random.seed(self.rng_seed) # 设置随机种子，保证每次划分一致
        random.shuffle(img_names)  # 随机打乱图片文件名顺序

        img_labels = [0 if n.startswith('cat') else 1 for n in img_names] # 根据文件名判断类别（'cat' -> 0，其他 -> 1）

        split_idx = int(len(img_labels) * self.split_n)  # 25000* 0.9 = 22500 # 根据split_n计算训练集和验证集的分割位置
        
        if self.mode == "train":
            img_set = img_names[:split_idx]   # 选择前90%的图片作为训练集
            label_set = img_labels[:split_idx]
        elif self.mode == "valid":
            img_set = img_names[split_idx:]
            label_set = img_labels[split_idx:]
        else:
            raise Exception("self.mode 无法识别，仅支持(train, valid)")

        path_img_set = [os.path.join(self.data_dir, n) for n in img_set] # 获取每个图片的完整路径
        data_info = [(n, l) for n, l in zip(path_img_set, label_set)]    # 创建包含图片路径和标签的元组列表

        return data_info   # 返回包含所有图片路径和标签的元组列表

In [5]:
# 深度卷积 一个卷积核对应一个输入通道，分组卷积
class dw_conv(nn.Module):
    def __init__(self,in_dim,out_dim,stride):
        super().__init__()
        # 卷积操作 groups很重要 我们要吧卷积核分成多少组 这样就能一个卷积和对应一个通道了 输入输出维度指的是通道
        self.dw_conv_k3 = nn.Conv2d(in_dim,out_dim,kernel_size = 3,stride = stride,groups = in_dim,bias = False)
        self.bn = nn.BatchNorm2d(out_dim)
        self.relu = nn.ReLU(inplace = True)

    def forward(self,x):
        x = self.dw_conv_k3(x)
        x = self.bn(x)
        x = self.relu(x)
        return x

In [6]:
# 点卷积
class point_conv(nn.Module):
    def __init__(self,in_dim,out_dim):
        super().__init__()
        # 卷积操作 groups很重要 我们要吧卷积核分成多少组 这样就能一个卷积和对应一个通道了 输入输出维度指的是通道
        self.p_conv_k1 = nn.Conv2d(in_dim,out_dim,kernel_size = 1,bias = False)
        self.bn = nn.BatchNorm2d(out_dim)
        self.relu = nn.ReLU(inplace = True)

    def forward(self,x):
        x = self.p_conv_k1(x)
        x = self.bn(x)
        x = self.relu(x)
        return x

In [7]:
class MobileNets(nn.Module):
    def __init__(self,num_classes,large_img):
        super(MobileNets,self).__init__()
        self.num_classes = num_classes
        if large_img:
            self.features = nn.Sequential(
                nn.Conv2d(3,32,kernel_size = 3,stride = 2),
                nn.ReLU(inplace = True),
                dw_conv(32,32, 1),
                point_conv(32, 64),
                dw_conv(64,64, 2),
                point_conv(64, 128),
                dw_conv(128, 128, 1),
                point_conv(128, 128),
                dw_conv(128, 128, 2),
                point_conv(128, 256),
                dw_conv(256, 256, 1),
                point_conv(256, 256),
                dw_conv(256, 256, 2),
                point_conv(256, 512),
                dw_conv(512, 512, 1),
                point_conv(512, 512),
                dw_conv(512, 512, 1),
                point_conv(512, 512),
                dw_conv(512, 512, 1),
                point_conv(512, 512),
                dw_conv(512, 512, 1),
                point_conv(512, 512),
                dw_conv(512, 512, 1),
                point_conv(512, 512),
                dw_conv(512, 512, 2),
                point_conv(512, 1024),
                dw_conv(1024, 1024, 2),
                point_conv(1024, 1024),
                nn.AdaptiveAvgPool2d(1),  # 使用适应池化,
            )
        else:
            # 唯一区别步长全是1
            self.features = nn.Sequential(
                nn.Conv2d(3,32,kernel_size = 3,stride = 1),
                nn.ReLU(inplace = True),
                dw_conv(32,32, 1),
                point_conv(32, 64),
                dw_conv(64,64, 1),
                point_conv(64, 128),
                dw_conv(128, 128, 1),
                point_conv(128, 128),
                dw_conv(128, 128, 1),
                point_conv(128, 256),
                dw_conv(256, 256, 1),
                point_conv(256, 256),
                dw_conv(256, 256, 1),
                point_conv(256, 512),
                dw_conv(512, 512, 1),
                point_conv(512, 512),
                dw_conv(512, 512, 1),
                point_conv(512, 512),
                dw_conv(512, 512, 1),
                point_conv(512, 512),
                dw_conv(512, 512, 1),
                point_conv(512, 512),
                dw_conv(512, 512, 1),
                point_conv(512, 512),
                dw_conv(512, 512, 1),
                point_conv(512, 1024),
                dw_conv(1024, 1024, 1),
                point_conv(1024, 1024),
                nn.AdaptiveAvgPool2d(1),  # 使用适应池化,
            )
        # num_classes 分类数
        self.fc = nn.Linear(1024,self.num_classes)
        
    def forward(self,x):
        x = self.features(x)
        x = x.view(-1,1024)
        x = self.fc(x)
        return x

In [8]:
# 创建模型
def get_model(vis_model=False):
    """
    创建模型，加载参数
    :param path_state_dict:是一个字符串，表示预训练模型的权重文件路径（即模型的状态字典）。
    :return:
    """
    model = MobileNets(10,False) # 创建一个 AlexNet 模型实例

    if vis_model:
        from torchsummary import summary
        summary(model, input_size=(3, 224, 224), device="cpu")
        """
            summary 可以输出模型的层次结构、每一层的参数量、输出的形状等信息。
            导入 torchsummary 库中的 summary 函数。torchsummary 是一个用于显示 PyTorch 模型结构和参数信息的库。
            model：表示模型对象，在这里是 AlexNet 模型。
            
            input_size=(3, 224, 224)：定义输入张量的尺寸，表示输入图像的通道数（3，RGB图像）和图像的尺寸（224x224像素）。
            对于 AlexNet 来说，输入图像通常是 224x224 的 RGB 图像。
        """

    model.to(device)
    return model # 返回创建并加载了预训练权重的模型对象 model。这个模型将可以用于推理（预测）或者进一步的训练。

In [9]:
# 初始化
data_dir = os.path.join(BASE_DIR, "cifar10","cifar-10-batches-py")
num_classes = 2

MAX_EPOCH = 3       
BATCH_SIZE = 128  # 批量大小与显存容量和数据集大小密切相关。较大的 BATCH_SIZE 可以加快训练速度，但需要更大的显存。一次性处理 256 个样本。
LR = 0.001          
log_interval = 1    # 每隔 1 个 训练批次（或 epoch）记录一次训练日志。 表示训练过程中，控制打印训练信息的频率。
val_interval = 1    # 每隔 1 个 epoch 进行一次验证集的评估。验证集的评估是为了监控模型在验证集上的表现，从而避免过拟合。
classes = 2         # 分类任务的类别数为 2。
start_epoch = -1     
lr_decay_step = 1   # 表示学习率的衰减步长。 每隔一定的 epoch（如 1）对学习率进行衰减，通常是为了使模型在后期更稳定地收敛。
                    # 可结合优化器的学习率调度策略（如 StepLR）使用。

In [10]:
# 数据预处理
norm_mean = [0.485, 0.456, 0.406]
norm_std = [0.229, 0.224, 0.225]

train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(norm_mean, norm_std),
])

normalizes = transforms.Normalize(norm_mean, norm_std)
valid_transform = transforms.Compose([
    transforms.Resize((32, 32)),                # 将图像缩放到 256x256 大小。
    transforms.TenCrop(30, vertical_flip=False), # 从图像中裁剪 10 个 224x224 的区域，包括 4 个角和 1 个中心，以及它们的水平翻转版本。
    transforms.Lambda(lambda crops: torch.stack([normalizes(transforms.ToTensor()(crop)) for crop in crops])),
])

# 构建MyDataset实例 
train_data = datasets.CIFAR10(root="cifar10", train=True, download=False, transform=train_transform)
valid_data = datasets.CIFAR10(root="cifar10", train=False, download=False,transform=valid_transform)

# 构建DataLoder
train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(dataset=valid_data, batch_size=32) # 验证集通常不需要打乱。

In [11]:
# 模型
Mobile_model = get_model(False)

Mobile_model.to(device)

MobileNets(
  (features): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU(inplace=True)
    (2): dw_conv(
      (dw_conv_k3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), groups=32, bias=False)
      (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (3): point_conv(
      (p_conv_k1): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (4): dw_conv(
      (dw_conv_k3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), groups=64, bias=False)
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (5): point_conv(
      (p_conv_k1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, af

In [12]:
# 损失函数
criterion = nn.CrossEntropyLoss()

In [13]:
optimizer = optim.SGD(Mobile_model.parameters(), lr=LR, momentum=0.9)  # 选择优化器
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_decay_step, gamma=0.1)  # 设置学习率下降策略

In [None]:
train_curve = list() 
valid_curve = list()
# 用于记录训练和验证过程中的损失值曲线。
# 每个 epoch 中的损失都会被添加到这些列表中。

for epoch in range(start_epoch + 1, MAX_EPOCH):

    loss_mean = 0.
    correct = 0.
    total = 0.

    Mobile_model.train() # 将模型切换到训练模式，启用 dropout 等操作。
    for i, data in enumerate(train_loader):
        # 遍历训练数据加载器，data 包含一个批次的 inputs（输入图像）和 labels（对应的标签）。

        # forward
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = Mobile_model(inputs)

        # backward
        optimizer.zero_grad()
        # 清空上一批次的梯度，防止累积。
        loss = criterion(outputs, labels)
        loss.backward()
        # 通过自动求导计算梯度。

        # update weights
        optimizer.step()
        # 使用优化器根据计算出的梯度更新模型参数。

        # 统计分类情况
        _, predicted = torch.max(outputs.data, 1)
        # 概率最大值的索引 dim = 1指定按行计算（即对于每个样本，找到所有类别的预测分数中最大的那个）
        total += labels.size(0)
        correct += (predicted == labels).squeeze().cpu().sum().numpy()

        # 打印训练信息
        loss_mean += loss.item()        # 获取当前批次的损失值。
        train_curve.append(loss.item()) # 将当前批次的损失值记录到 train_curve。
        if (i+1) % log_interval == 0:
            loss_mean = loss_mean / log_interval
            print("Training:Epoch[{:0>3}/{:0>3}] Iteration[{:0>3}/{:0>3}] Loss: {:.4f} Acc:{:.2%}".format(
                epoch, MAX_EPOCH, i+1, len(train_loader), loss_mean, correct / total))
            loss_mean = 0.

    scheduler.step()  # 更新学习率 调用学习率调度器，根据设置调整当前学习率（如按一定步长下降）。

    # validate the model
    if (epoch+1) % val_interval == 0:

        correct_val = 0.
        total_val = 0.
        loss_val = 0.
        Mobile_model.eval() 
        
        # 禁用自动求导，减少内存占用，加速计算。
        with torch.no_grad():
            for j, data in enumerate(valid_loader):
                inputs, labels = data
                inputs, labels = inputs.to(device), labels.to(device)

                bs, ncrops, c, h, w = inputs.size()     # [4, 10, 3, 224, 224]
                outputs = Mobile_model(inputs.view(-1, c, h, w))
                outputs_avg = outputs.view(bs, ncrops, -1).mean(1)
                # 计算 10 个裁剪图像的预测值平均值。

                loss = criterion(outputs_avg, labels)

                _, predicted = torch.max(outputs_avg.data, 1)
                total_val += labels.size(0)
                correct_val += (predicted == labels).squeeze().cpu().sum().numpy()

                loss_val += loss.item()

            loss_val_mean = loss_val/len(valid_loader)
            valid_curve.append(loss_val_mean)
            print("Valid:\t Epoch[{:0>3}/{:0>3}] Iteration[{:0>3}/{:0>3}] Loss: {:.4f} Acc:{:.2%}".format(
                epoch, MAX_EPOCH, j+1, len(valid_loader), loss_val_mean, correct_val / total_val))
        Mobile_model.train()

Training:Epoch[000/003] Iteration[001/391] Loss: 2.3497 Acc:10.94%
Training:Epoch[000/003] Iteration[002/391] Loss: 2.3552 Acc:8.59%
Training:Epoch[000/003] Iteration[003/391] Loss: 2.3360 Acc:9.38%
Training:Epoch[000/003] Iteration[004/391] Loss: 2.3380 Acc:9.18%
Training:Epoch[000/003] Iteration[005/391] Loss: 2.3394 Acc:9.38%
Training:Epoch[000/003] Iteration[006/391] Loss: 2.3118 Acc:9.90%
Training:Epoch[000/003] Iteration[007/391] Loss: 2.2991 Acc:9.71%
Training:Epoch[000/003] Iteration[008/391] Loss: 2.3146 Acc:9.77%
Training:Epoch[000/003] Iteration[009/391] Loss: 2.3247 Acc:9.64%
Training:Epoch[000/003] Iteration[010/391] Loss: 2.2963 Acc:10.00%
Training:Epoch[000/003] Iteration[011/391] Loss: 2.3186 Acc:10.01%
Training:Epoch[000/003] Iteration[012/391] Loss: 2.2945 Acc:9.96%
Training:Epoch[000/003] Iteration[013/391] Loss: 2.2918 Acc:9.98%
Training:Epoch[000/003] Iteration[014/391] Loss: 2.3022 Acc:9.88%
Training:Epoch[000/003] Iteration[015/391] Loss: 2.3095 Acc:9.95%
Trainin

In [None]:
train_x = range(len(train_curve))
train_y = train_curve

train_iters = len(train_loader)
valid_x = np.arange(1, len(valid_curve)+1) * train_iters*val_interval # 由于valid中记录的是epochloss，需要对记录点进行转换到iterations
valid_y = valid_curve

plt.plot(train_x, train_y, label='Train')
plt.plot(valid_x, valid_y, label='Valid')

plt.legend(loc='upper right')
plt.ylabel('loss value')
plt.xlabel('Iteration')
plt.show()