## 导入工具包

In [49]:
import time
import os
from tqdm import tqdm

import pandas as pd
import numpy as np

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F

import matplotlib.pyplot as plt
%matplotlib inline

# 忽略烦人的红色提示
import warnings
warnings.filterwarnings("ignore")

# 获取计算硬件
# 有 GPU 就用 GPU，没有就用 CPU
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('device', device)

device cuda:0


## 图像预处理

In [50]:
from torchvision import transforms

# 训练集图像预处理：缩放裁剪、图像增强、转 Tensor、归一化
train_transform = transforms.Compose([transforms.RandomResizedCrop(224),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                     ])

# 测试集图像预处理-RCTN：缩放、裁剪、转 Tensor、归一化
test_transform = transforms.Compose([transforms.Resize(256),
                                     transforms.CenterCrop(224),
                                     transforms.ToTensor(),
                                     transforms.Normalize(
                                         mean=[0.485, 0.456, 0.406], 
                                         std=[0.229, 0.224, 0.225])
                                    ])

## 载入图像分类数据集

In [51]:
# 数据集文件夹路径
dataset_dir = 'data'
train_path = os.path.join(dataset_dir, 'train')
test_path = os.path.join(dataset_dir, 'val')
print('训练集路径', train_path)
print('测试集路径', test_path)

from torchvision import datasets
# 载入训练集
train_dataset = datasets.ImageFolder(train_path, train_transform)
# 载入测试集
test_dataset = datasets.ImageFolder(test_path, test_transform)

print('训练集图像数量', len(train_dataset))
print('类别个数', len(train_dataset.classes))
print('各类别名称', train_dataset.classes)
print('测试集图像数量', len(test_dataset))
print('类别个数', len(test_dataset.classes))
print('各类别名称', test_dataset.classes)

训练集路径 data\train
测试集路径 data\val
训练集图像数量 2360
类别个数 24
各类别名称 ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24']
测试集图像数量 590
类别个数 24
各类别名称 ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24']


## 类别和索引号 映射字典

In [52]:
# 各类别名称
class_names = train_dataset.classes
n_class = len(class_names)
# 映射关系：类别 到 索引号
train_dataset.class_to_idx
# 映射关系：索引号 到 类别
idx_to_labels = {y:x for x,y in train_dataset.class_to_idx.items()}

In [53]:
idx_to_labels

{0: '01',
 1: '02',
 2: '03',
 3: '04',
 4: '05',
 5: '06',
 6: '07',
 7: '08',
 8: '09',
 9: '10',
 10: '11',
 11: '12',
 12: '13',
 13: '14',
 14: '15',
 15: '16',
 16: '17',
 17: '18',
 18: '19',
 19: '20',
 20: '21',
 21: '22',
 22: '23',
 23: '24'}

## 定义数据加载器DataLoader

In [54]:
from torch.utils.data import DataLoader

BATCH_SIZE = 32

# 训练集的数据加载器
train_loader = DataLoader(train_dataset,
                          batch_size=BATCH_SIZE,
                          shuffle=True,
                          num_workers=4
                         )

# 测试集的数据加载器
test_loader = DataLoader(test_dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False,
                         num_workers=4
                        )

## 导入训练需使用的工具包

In [55]:
from torchvision import models
import torch.optim as optim
from torch.optim import lr_scheduler

## 选择迁移学习训练方式

斯坦福CS231N【迁移学习】中文精讲：https://www.bilibili.com/video/BV1K7411W7So

斯坦福CS231N【迁移学习】官方笔记：https://cs231n.github.io/transfer-learning

如果你的数据集和MS COCO数据集的图像域**类似**（街景、动植物、生活用品），可以保留预训练模型权重，在自己的数据集上迁移学习微调分类输出层或所有层。站在巨人的肩膀上，复用预训练模型在MS COCO数据集上学习到的图像特征。（Transfer Learning, Fine Tuning）

如果你的数据集和MS COCO数据集的图像域**不类似**（医疗影像、显微镜图像、工业检测、天文照片、动画、油画），可以随机初始化模型权重，在自己的数据集上重新训练所有层。（From Scratch）。或者冻结底层权重，只重新训练顶层，复用预训练模型在MS COCO数据集上学习到的底层图像特征。

### 微调训练所有层

In [56]:
model = models.resnet18(pretrained=True) # 载入预训练模型

model.fc = nn.Linear(model.fc.in_features, n_class)

In [57]:
model.fc

Linear(in_features=512, out_features=24, bias=True)

In [58]:
optimizer = optim.Adam(model.parameters())

## 训练配置

In [59]:
model = model.to(device)

# 交叉熵损失函数
criterion = nn.CrossEntropyLoss()
# criterion = nn.MSELoss()

# 训练轮次 Epoch
EPOCHS = 30

# 学习率降低策略
lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

## 函数：在训练集上训练

In [60]:
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score

In [61]:
def train_one_batch(images, labels):
    '''
    运行一个 batch 的训练，返回当前 batch 的训练日志
    '''
    
    # 获得一个 batch 的数据和标注
    images = images.to(device)
    labels = labels.to(device)
    
    outputs = model(images) # 输入模型，执行前向预测
    loss = criterion(outputs, labels) # 计算当前 batch 中，每个样本的平均交叉熵损失函数值
    
    # 优化更新权重
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # 获取当前 batch 的标签类别和预测类别
    _, preds = torch.max(outputs, 1) # 获得当前 batch 所有图像的预测类别
    preds = preds.cpu().numpy()
    loss = loss.detach().cpu().numpy()
    outputs = outputs.detach().cpu().numpy()
    labels = labels.detach().cpu().numpy()
    
    log_train = {}
    log_train['epoch'] = epoch
    log_train['batch'] = batch_idx
    # 计算分类评估指标
    log_train['train_loss'] = loss
    log_train['train_accuracy'] = accuracy_score(labels, preds)
    # log_train['train_precision'] = precision_score(labels, preds, average='macro')
    # log_train['train_recall'] = recall_score(labels, preds, average='macro')
    # log_train['train_f1-score'] = f1_score(labels, preds, average='macro')
    
    return log_train

## 函数：在整个测试集上评估

In [62]:
def evaluate_testset():
    '''
    在整个测试集上评估，返回分类评估指标日志
    '''

    loss_list = []
    labels_list = []
    preds_list = []
    
    with torch.no_grad():
        for images, labels in test_loader: # 生成一个 batch 的数据和标注
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images) # 输入模型，执行前向预测

            # 获取整个测试集的标签类别和预测类别
            _, preds = torch.max(outputs, 1) # 获得当前 batch 所有图像的预测类别
            preds = preds.cpu().numpy()
            loss = criterion(outputs, labels) # 由 logit，计算当前 batch 中，每个样本的平均交叉熵损失函数值
            loss = loss.detach().cpu().numpy()
            outputs = outputs.detach().cpu().numpy()
            labels = labels.detach().cpu().numpy()

            loss_list.append(loss)
            labels_list.extend(labels)
            preds_list.extend(preds)
        
    log_test = {}
    log_test['epoch'] = epoch
    
    # 计算分类评估指标
    log_test['test_loss'] = np.mean(loss_list)
    log_test['test_accuracy'] = accuracy_score(labels_list, preds_list)
    log_test['test_precision'] = precision_score(labels_list, preds_list, average='macro')
    log_test['test_recall'] = recall_score(labels_list, preds_list, average='macro')
    log_test['test_f1-score'] = f1_score(labels_list, preds_list, average='macro')
    
    return log_test

## 训练开始之前，记录日志

In [63]:
epoch = 0
batch_idx = 0
best_test_accuracy = 0

## 运行训练

In [None]:
for epoch in range(1, EPOCHS+1):
    
    print(f'Epoch {epoch}/{EPOCHS}')
    
    ## 训练阶段
    model.train()
    for images, labels in tqdm(train_loader): # 获得一个 batch 的数据和标注
        batch_idx += 1
        log_train = train_one_batch(images, labels)
    lr_scheduler.step()
    ## 测试阶段
    model.eval()
    log_test = evaluate_testset()
    # 保存最新的最佳模型文件
    if log_test['test_accuracy'] > best_test_accuracy: 
        # 删除旧的最佳模型文件(如有)
        old_best_checkpoint_path = 'data/checkpoint/best-{:.3f}.pth'.format(best_test_accuracy)
        if os.path.exists(old_best_checkpoint_path):
            os.remove(old_best_checkpoint_path)
        # 保存新的最佳模型文件
        best_test_accuracy = log_test['test_accuracy']
        new_best_checkpoint_path = 'data/checkpoint/best-{:.3f}.pth'.format(log_test['test_accuracy'])
        torch.save(model, new_best_checkpoint_path)
        print('保存新的最佳模型', 'data/checkpoint/best-{:.3f}.pth'.format(best_test_accuracy))

Epoch 1/30


100%|██████████| 74/74 [00:39<00:00,  1.86it/s]


保存新的最佳模型 data/checkpoint/best-0.195.pth
Epoch 2/30


100%|██████████| 74/74 [00:36<00:00,  2.02it/s]


保存新的最佳模型 data/checkpoint/best-0.280.pth
Epoch 3/30


100%|██████████| 74/74 [00:53<00:00,  1.38it/s]


保存新的最佳模型 data/checkpoint/best-0.336.pth
Epoch 4/30


100%|██████████| 74/74 [00:46<00:00,  1.58it/s]


Epoch 5/30


100%|██████████| 74/74 [00:45<00:00,  1.61it/s]


保存新的最佳模型 data/checkpoint/best-0.464.pth
Epoch 6/30


100%|██████████| 74/74 [00:46<00:00,  1.60it/s]


保存新的最佳模型 data/checkpoint/best-0.586.pth
Epoch 7/30


100%|██████████| 74/74 [00:45<00:00,  1.61it/s]


Epoch 8/30


100%|██████████| 74/74 [00:44<00:00,  1.68it/s]


保存新的最佳模型 data/checkpoint/best-0.663.pth
Epoch 9/30


100%|██████████| 74/74 [00:39<00:00,  1.89it/s]


Epoch 10/30


100%|██████████| 74/74 [00:47<00:00,  1.55it/s]


Epoch 11/30


100%|██████████| 74/74 [00:41<00:00,  1.77it/s]


保存新的最佳模型 data/checkpoint/best-0.697.pth
Epoch 12/30


100%|██████████| 74/74 [00:43<00:00,  1.72it/s]


保存新的最佳模型 data/checkpoint/best-0.753.pth
Epoch 13/30


100%|██████████| 74/74 [00:43<00:00,  1.70it/s]


保存新的最佳模型 data/checkpoint/best-0.788.pth
Epoch 14/30


100%|██████████| 74/74 [00:40<00:00,  1.84it/s]


Epoch 15/30


100%|██████████| 74/74 [00:44<00:00,  1.68it/s]


Epoch 16/30


100%|██████████| 74/74 [00:41<00:00,  1.78it/s]


保存新的最佳模型 data/checkpoint/best-0.831.pth
Epoch 17/30


100%|██████████| 74/74 [00:40<00:00,  1.82it/s]


保存新的最佳模型 data/checkpoint/best-0.846.pth
Epoch 18/30


100%|██████████| 74/74 [00:39<00:00,  1.88it/s]


Epoch 19/30


100%|██████████| 74/74 [00:33<00:00,  2.18it/s]


Epoch 20/30


100%|██████████| 74/74 [00:32<00:00,  2.29it/s]


Epoch 21/30


100%|██████████| 74/74 [00:30<00:00,  2.46it/s]


保存新的最佳模型 data/checkpoint/best-0.859.pth
Epoch 22/30


100%|██████████| 74/74 [00:30<00:00,  2.41it/s]


Epoch 23/30


100%|██████████| 74/74 [00:29<00:00,  2.51it/s]


Epoch 24/30


100%|██████████| 74/74 [00:28<00:00,  2.56it/s]


Epoch 25/30


100%|██████████| 74/74 [00:30<00:00,  2.46it/s]


Epoch 26/30


100%|██████████| 74/74 [00:39<00:00,  1.88it/s]


保存新的最佳模型 data/checkpoint/best-0.878.pth
Epoch 27/30


100%|██████████| 74/74 [00:43<00:00,  1.70it/s]


保存新的最佳模型 data/checkpoint/best-0.890.pth
Epoch 28/30


100%|██████████| 74/74 [00:42<00:00,  1.72it/s]


Epoch 29/30


100%|██████████| 74/74 [00:42<00:00,  1.73it/s]


Epoch 30/30


  0%|          | 0/74 [00:00<?, ?it/s]

## 在测试集上评价

In [None]:
# 载入最佳模型作为当前模型
model = torch.load('data/checkpoint/best-{:.3f}.pth'.format(best_test_accuracy))

In [None]:
model.eval()
print(evaluate_testset())

## 参考文档

https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html

https://www.bilibili.com/video/BV14J411X7Bb

https://www.bilibili.com/video/BV1w4411u7ay