# 迁移学习微调训练图像分类模型

在自己的图像分类数据集上，使用ImageNet预训练图像分类模型初始化，改动分类层，迁移学习微调训练

同济子豪兄：https://space.bilibili.com/1900783

[代码运行云GPU环境](https://featurize.cn/?s=d7ce99f842414bfcaea5662a97581bd1)：GPU RTX 3060、CUDA v11.2

## 导入工具包

In [57]:
import time
import os
from tqdm import tqdm

import pandas as pd
import numpy as np

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.optim import Adam


import matplotlib.pyplot as plt
%matplotlib inline

# 忽略烦人的红色提示
import warnings
warnings.filterwarnings("ignore")

# 获取计算硬件
# 有 GPU 就用 GPU，没有就用 CPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('device', device)

device cuda:0


## 图像预处理

In [58]:
from torchvision import transforms

# 训练集图像预处理：缩放裁剪、图像增强、转 Tensor、归一化
train_transform = transforms.Compose([transforms.RandomResizedCrop(224),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                     ])

# 测试集图像预处理-RCTN：缩放、裁剪、转 Tensor、归一化
test_transform = transforms.Compose([transforms.Resize(256),
                                     transforms.CenterCrop(224),
                                     transforms.ToTensor(),
                                     transforms.Normalize(
                                         mean=[0.485, 0.456, 0.406], 
                                         std=[0.229, 0.224, 0.225])
                                    ])

## 载入图像分类数据集

In [59]:
# 数据集文件夹路径
dataset_dir = r'E:\MV-Code\Datasets\archive\SR-Base_datasets'

In [60]:
train_path = os.path.join(dataset_dir, 'train')
test_path = os.path.join(dataset_dir, 'val')
print('训练集路径', train_path)
print('测试集路径', test_path)

from torchvision import datasets
# 载入训练集
train_dataset = datasets.ImageFolder(train_path, train_transform)
# 载入测试集
test_dataset = datasets.ImageFolder(test_path, test_transform)

print('训练集图像数量', len(train_dataset))
print('类别个数', len(train_dataset.classes))
print('各类别名称', train_dataset.classes)
print('测试集图像数量', len(test_dataset))
print('类别个数', len(test_dataset.classes))
print('各类别名称', test_dataset.classes)

训练集路径 E:\MV-Code\Datasets\archive\SR-Base_datasets\train
测试集路径 E:\MV-Code\Datasets\archive\SR-Base_datasets\val
训练集图像数量 19290
类别个数 2
各类别名称 ['parasitized', 'uninfected']
测试集图像数量 4134
类别个数 2
各类别名称 ['parasitized', 'uninfected']


## 类别和索引号 映射字典

In [61]:
# 各类别名称
class_names = train_dataset.classes
n_class = len(class_names)
# 映射关系：类别 到 索引号
train_dataset.class_to_idx
# 映射关系：索引号 到 类别
idx_to_labels = {y:x for x,y in train_dataset.class_to_idx.items()}

In [62]:
idx_to_labels

{0: 'parasitized', 1: 'uninfected'}

In [63]:
# 保存为本地的 npy 文件
np.save('idx_to_labels.npy', idx_to_labels)
np.save('labels_to_idx.npy', train_dataset.class_to_idx)

## 定义数据加载器DataLoader

In [64]:
from torch.utils.data import DataLoader

BATCH_SIZE = 32

# 训练集的数据加载器
train_loader = DataLoader(train_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=True,
                          num_workers=4
                         )

# 测试集的数据加载器
test_loader = DataLoader(test_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False,
                         num_workers=4
                        )

## 导入训练需使用的工具包

In [65]:
from torchvision import models
import torch.optim as optim
from torch.optim import lr_scheduler

## 选择迁移学习训练方式

斯坦福CS231N【迁移学习】中文精讲：https://www.bilibili.com/video/BV1K7411W7So

斯坦福CS231N【迁移学习】官方笔记：https://cs231n.github.io/transfer-learning

如果你的数据集和MS COCO数据集的图像域**类似**（街景、动植物、生活用品），可以保留预训练模型权重，在自己的数据集上迁移学习微调分类输出层或所有层。站在巨人的肩膀上，复用预训练模型在MS COCO数据集上学习到的图像特征。（Transfer Learning, Fine Tuning）

如果你的数据集和MS COCO数据集的图像域**不类似**（医疗影像、显微镜图像、工业检测、天文照片、动画、油画），可以随机初始化模型权重，在自己的数据集上重新训练所有层。（From Scratch）。或者冻结底层权重，只重新训练顶层，复用预训练模型在MS COCO数据集上学习到的底层图像特征。

### 选择一：只微调训练模型最后一层（全连接分类层）

In [10]:
model = models.resnet18(pretrained=True) # 载入预训练模型

# 修改全连接层，使得全连接层的输出与当前数据集类别数对应
# 新建的层默认 requires_grad=True
model.fc = nn.Linear(model.fc.in_features, n_class)

In [11]:
model.fc

Linear(in_features=512, out_features=2, bias=True)

In [12]:
# 只微调训练最后一层全连接层的参数，其它层冻结
optimizer = optim.Adam(model.fc.parameters())

### 选择二：微调训练所有层

In [None]:
# model = models.resnet18(pretrained=True) # 载入预训练模型

# model.fc = nn.Linear(model.fc.in_features, n_class)

# optimizer = optim.Adam(model.parameters())

### 选择三：随机初始化模型全部权重，从头训练所有层

In [None]:
# model = models.resnet18(pretrained=False) # 只载入模型结构，不载入预训练权重参数

# model.fc = nn.Linear(model.fc.in_features, n_class)

# optimizer = optim.Adam(model.parameters())

In [72]:
# 设置模型路径
model_path = r'E:\MV-Code-202018010103-Lucy\Model\full_model.pth'

# 加载模型
model = torch.load(model_path)


AttributeError: 'ConvertModel' object has no attribute 'fc'

In [12]:
class SEBlock(nn.Module):
    def __init__(self, channel, reduction=8):
        super(SEBlock, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y.expand_as(x)


In [13]:
class CNN1(nn.Module):
    def __init__(self):
        super(CNN1, self).__init__()
        
        # Block 1
        self.conv1_1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.bn1_1 = nn.BatchNorm2d(32)
        
        # Block 2
        self.conv2_1 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.bn2_1 = nn.BatchNorm2d(64)
        
        # Block 3
        self.conv3_1 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.bn3_1 = nn.BatchNorm2d(128)
        
        # Block 4
        self.conv4_1 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)
        self.bn4_1 = nn.BatchNorm2d(128)
        
        # SE Block
        self.se1 = SEBlock(channel=128)

    def forward(self, x):
        # Block 1
        x = F.relu(self.bn1_1(self.conv1_1(x)))
        print(f"After block 1: {x.shape}") # 添加这一行
        x = F.max_pool2d(x, kernel_size=2, stride=2)
        
        # Block 2
        x = F.relu(self.bn2_1(self.conv2_1(x)))
        print(f"After block 2: {x.shape}") # 添加这一行
        x = F.max_pool2d(x, kernel_size=2, stride=2)
        
        # Block 3
        x = F.relu(self.bn3_1(self.conv3_1(x)))
        
        # Block 4
        x = F.relu(self.bn4_1(self.conv4_1(x)))
        x = F.max_pool2d(x, kernel_size=2, stride=2)
        
        # SE Block
        x = self.se1(x)
        
        return x


In [14]:
class CNN2(nn.Module):
    def __init__(self):
        super(CNN2, self).__init__()
        
        # Parallel Blocks, assuming Block1, Block2, and Block3 are similar
        self.conv_blocks = nn.ModuleList([
            nn.Sequential(
                nn.Conv2d(in_channels=3, out_channels=16, kernel_size=kernel_size, padding=kernel_size//2),
                nn.BatchNorm2d(16),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2, stride=2),
                SEBlock(channel=16)
            ) for kernel_size in [3, 5, 7]
        ])
        
        # After concatenation
        self.conv1_2 = nn.Conv2d(in_channels=48, out_channels=128, kernel_size=3, padding=1)
        self.bn1_2 = nn.BatchNorm2d(128)
        self.pool1_2 = nn.MaxPool2d(kernel_size=4, stride=4)

    def forward(self, x):
        # Parallel Blocks
        outputs = [block(x) for block in self.conv_blocks]
        
        # Concatenation
        x = torch.cat(outputs, dim=1)
        
        # After concatenation
        x = F.relu(self.bn1_2(self.conv1_2(x)))
        x = self.pool1_2(x)
        
        return x


In [23]:
class CombinedModel(nn.Module):
    def __init__(self, input_size=(256, 256, 3)):
        super(CombinedModel, self).__init__()
        self.cnn1 = CNN1()
        self.cnn2 = CNN2()

        # 创建一个临时输入,并调整通道数的位置
        tmp_input = torch.randn(1, 3, *input_size[:2])
        cnn1_output = self.cnn1(tmp_input)
        cnn2_output = self.cnn2(tmp_input)

        # 获取两个CNN网络输出的拉平后的维度
        cnn1_output_size = cnn1_output.view(cnn1_output.size(0), -1).size(1)
        cnn2_output_size = cnn2_output.view(cnn2_output.size(0), -1).size(1)
        total_output_size = cnn1_output_size + cnn2_output_size

        self.fc = nn.Linear(total_output_size, 1)  # 用于二分类

    def forward(self, x):
      if x.shape[1] != 3:
          x = x.permute(0, 3, 1, 2)
    
      x1 = self.cnn1(x)
      x2 = self.cnn2(x)
    
      print(f"x1 shape: {x1.shape}") 
      print(f"x2 shape: {x2.shape}")

    # Flatten the features
      x1 = x1.view(x1.size(0), -1)
      x2 = x2.view(x2.size(0), -1)

    # Concatenate features from both CNNs
      x = torch.cat((x1, x2), dim=1)

    # Pass through the final fully connected layer
      x = self.fc(x)

    # Adjust the output shape to match the target
      x = x.squeeze(1)

    # Convert output to Long data type
      x = x.long()

      return x

## 训练配置

In [74]:
# 注意，这里使用的损失函数是nn.BCEWithLogitsLoss
# 它是sigmoid激活和二进制交叉熵损失的组合，适用于二分类任务
criterion = nn.BCEWithLogitsLoss()

model = model(input_size=(256,256,3)).to(device)
optimizer = Adam(model.parameters(), lr=0.001)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# 训练轮次 Epoch
EPOCHS = 30

# ...这里添加训练循环代码...


TypeError: forward() got an unexpected keyword argument 'input_size'

SyntaxError: invalid syntax (1908337407.py, line 2)

## 函数：在训练集上训练

In [42]:
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

In [47]:
def train_one_batch(images, labels):
    '''
    运行一个 batch 的训练，返回当前 batch 的训练日志
    '''
    
    # 获得一个 batch 的数据和标注
    images = images.to(device)
    labels = labels.to(device)
    
    outputs = model(images) # 输入模型，执行前向预测
    outputs = outputs.squeeze(-1)  # 将输出形状调整为(batch_size,)
    outputs = outputs.float()  # 确保输出是浮点类型
    labels = labels.float()  # 将标签转换为浮点型
    loss = criterion(outputs, labels)  # 计算当前 batch 中的损失函数值

    
    # 优化更新权重
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # 获取当前 batch 的标签类别和预测类别
    _, preds = torch.max(outputs, 1) # 获得当前 batch 所有图像的预测类别
    preds = preds.cpu().numpy()
    loss = loss.detach().cpu().numpy()
    outputs = outputs.detach().cpu().numpy()
    labels = labels.detach().cpu().numpy()
    
    log_train = {}
    log_train['epoch'] = epoch
    log_train['batch'] = batch_idx
    # 计算分类评估指标
    log_train['train_loss'] = loss
    log_train['train_accuracy'] = accuracy_score(labels, preds)
    # log_train['train_precision'] = precision_score(labels, preds, average='macro')
    # log_train['train_recall'] = recall_score(labels, preds, average='macro')
    # log_train['train_f1-score'] = f1_score(labels, preds, average='macro')
    
    return log_train

## 函数：在整个测试集上评估

In [48]:
def evaluate_testset():
    '''
    在整个测试集上评估，返回分类评估指标日志
    '''

    loss_list = []
    labels_list = []
    preds_list = []
    
    with torch.no_grad():
        for images, labels in test_loader: # 生成一个 batch 的数据和标注
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images) # 输入模型，执行前向预测

            # 获取整个测试集的标签类别和预测类别
            _, preds = torch.max(outputs, 1) # 获得当前 batch 所有图像的预测类别
            preds = preds.cpu().numpy()
            loss = criterion(outputs, labels) # 由 logit，计算当前 batch 中，每个样本的平均交叉熵损失函数值
            loss = loss.detach().cpu().numpy()
            outputs = outputs.detach().cpu().numpy()
            labels = labels.detach().cpu().numpy()

            loss_list.append(loss)
            labels_list.extend(labels)
            preds_list.extend(preds)
        
    log_test = {}
    log_test['epoch'] = epoch
    
    # 计算分类评估指标
    log_test['test_loss'] = np.mean(loss_list)
    log_test['test_accuracy'] = accuracy_score(labels_list, preds_list)
    log_test['test_precision'] = precision_score(labels_list, preds_list, average='macro')
    log_test['test_recall'] = recall_score(labels_list, preds_list, average='macro')
    log_test['test_f1-score'] = f1_score(labels_list, preds_list, average='macro')
    
    return log_test

## 训练开始之前，记录日志

In [49]:
epoch = 0
batch_idx = 0
best_test_accuracy = 0

In [50]:
# 训练日志-训练集
# 初始化空的 DataFrame

df_train_log = pd.DataFrame()

# 创建和更新 log_train 字典
log_train = {'epoch': 0, 'batch': 0}
images, labels = next(iter(train_loader))
log_train.update(train_one_batch(images, labels))

# 将 log_train 字典转换为 DataFrame
log_train_df = pd.DataFrame([log_train])

# 使用 pd.concat 合并 DataFrame
df_train_log = pd.concat([df_train_log, log_train_df], ignore_index=True)

After block 1: torch.Size([32, 32, 256, 256])
After block 2: torch.Size([32, 64, 128, 128])
x1 shape: torch.Size([32, 128, 32, 32])
x2 shape: torch.Size([32, 128, 32, 32])


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [None]:
df_train_log

In [None]:
# 训练日志-测试集
# 初始化空的 DataFrame
df_test_log = pd.DataFrame()

# 创建和更新 log_test 字典
log_test = {'epoch': 0}
log_test.update(evaluate_testset())

# 将 log_test 字典转换为 DataFrame
log_test_df = pd.DataFrame([log_test])

# 使用 pd.concat 合并 DataFrame
df_test_log = pd.concat([df_test_log, log_test_df], ignore_index=True)

In [None]:
df_test_log

## 登录wandb

1.安装 wandb：pip install wandb

2.登录 wandb：在命令行中运行wandb login

3.按提示复制粘贴API Key至命令行中

## 创建wandb可视化项目

In [75]:
import wandb

wandb.init(project='fruit30', name=time.strftime('%m%d%H%M%S'))

wandb: Currently logged in as: yungleanalwaysright (luciferinc). Use `wandb login --relogin` to force relogin
Traceback (most recent call last):
  File "D:\Software\anaconda\envs\Pytorch\lib\site-packages\wandb\sdk\wandb_init.py", line 1177, in init
    wi.setup(kwargs)
  File "D:\Software\anaconda\envs\Pytorch\lib\site-packages\wandb\sdk\wandb_init.py", line 331, in setup
    self._log_setup(settings)
  File "D:\Software\anaconda\envs\Pytorch\lib\site-packages\wandb\sdk\wandb_init.py", line 488, in _log_setup
    filesystem.mkdir_exists_ok(os.path.dirname(settings.log_user))
  File "D:\Software\anaconda\envs\Pytorch\lib\site-packages\wandb\sdk\lib\filesystem.py", line 30, in mkdir_exists_ok
    os.makedirs(dir_name, exist_ok=True)
  File "D:\Software\anaconda\envs\Pytorch\lib\os.py", line 215, in makedirs
    makedirs(head, exist_ok=exist_ok)
  File "D:\Software\anaconda\envs\Pytorch\lib\os.py", line 225, in makedirs
    mkdir(name, mode)
OSError: [WinError 112] There is not enough sp

Error: An unexpected error occurred

## 运行训练

In [None]:
# 初始化 DataFrames
df_train_log = pd.DataFrame()
df_test_log = pd.DataFrame()

for epoch in range(1, EPOCHS+1):
    print(f'Epoch {epoch}/{EPOCHS}')
    
    # 训练阶段
    model.train()
    batch_idx = 0
    for images, labels in tqdm(train_loader): # 获得一个 batch 的数据和标注
        batch_idx += 1
        log_train = train_one_batch(images, labels)
        
        # 使用 pd.concat 而不是 append
        log_train_df = pd.DataFrame([log_train])
        df_train_log = pd.concat([df_train_log, log_train_df], ignore_index=True)

        wandb.log(log_train)
        
    lr_scheduler.step()

    # 测试阶段
    model.eval()
    log_test = evaluate_testset()
    
    # 使用 pd.concat 而不是 append
    log_test_df = pd.DataFrame([log_test])
    df_test_log = pd.concat([df_test_log, log_test_df], ignore_index=True)

    wandb.log(log_test)
    
    # 保存最新的最佳模型文件
    if log_test['test_accuracy'] > best_test_accuracy:
        # 删除旧的最佳模型文件(如有)
        old_best_checkpoint_path = f'checkpoint/best-{best_test_accuracy:.3f}.pth'
        if os.path.exists(old_best_checkpoint_path):
            os.remove(old_best_checkpoint_path)
        # 保存新的最佳模型文件
        best_test_accuracy = log_test['test_accuracy']
        new_best_checkpoint_path = f'checkpoint/best-{best_test_accuracy:.3f}.pth'
        torch.save(model, new_best_checkpoint_path)
        print(f'保存新的最佳模型 checkpoint/best-{best_test_accuracy:.3f}.pth')

# 将日志保存为 CSV 文件
df_train_log.to_csv('训练日志-训练集.csv', index=False)
df_test_log.to_csv('训练日志-测试集.csv', index=False)

## 在测试集上评价

In [None]:
# 载入最佳模型作为当前模型
model = torch.load('checkpoint/best-{:.3f}.pth'.format(best_test_accuracy))

In [None]:
model.eval()
print(evaluate_testset())

## 参考文档

https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html

https://www.bilibili.com/video/BV14J411X7Bb

https://www.bilibili.com/video/BV1w4411u7ay