# 基于深度学习的犬类识别研究与实现（源代码）



## 1 导入程序所需要的包

In [None]:
import os, sys
import time
import shutil
import seaborn 
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from PIL import Image
import torch
# 设置 (CPU) 生成随机数的种子，一旦固定种子，后面依次生成的随机数都是固定的
# torch.manual_seed(42)
from torch import nn
from torch.optim import SGD, Adam
from torch.utils.data import DataLoader, RandomSampler
from torch.utils.data.dataset import Dataset
from torchvision.models import resnet
from torchvision import transforms, datasets, models
from torch.optim.lr_scheduler import ReduceLROnPlateau, MultiStepLR
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, classification_report
from d2l import torch as d2l

plt.rcParams['font.sans-serif'] = ['SimHei'] # 正常显示中文
plt.rcParams['axes.unicode_minus'] = False   # 正常显示负号
seaborn.set(font='SimHei')  # 解决Seaborn中文显示问题

#### 查看设备信息和语言版本

In [None]:
print('PyTorch:', torch.__version__)
print('设备:', torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU')
print('Python: ', sys.version)

## 2 数据集处理

In [None]:
# 原始数据集在old_path目录里面
old_path = '../input/stanford-dogs-dataset/images/Images'

# 划分好的数据集放在data_path目录里
# 本机: 
# data_path = '../input/stanford-dogs-dataset/images/train_val_test'
# kaggle: ../input/stanford-dogs-dataset-splited-pl/train_val_test
data_path = '../input/stanford-dogs-dataset-splited-pl/train_val_test'

# result_path = './working'
# kaggle  ./
result_path = './'

# presplit为True表示数据集已划分，为False表示还未划分
presplit = False


In [None]:
# 查找训练集、测试集和验证集，若有则将presplit置为True
if os.path.exists(os.path.join(data_path, 'training_set')) & \
   os.path.exists(os.path.join(data_path, 'test_set'))     & \
   os.path.exists(os.path.join(data_path, 'val_set')) :
    presplit = True
    
else :
    presplit = False
    
print('presplit: ', presplit)
if presplit :
    print('数据已划分为训练集、测试集和验证集') 

### 划分训练集、验证集和测试集

1. 统计每一个类别文件夹class_folder(labels)下的文件数量（图像数量image_amount）

2. 根据图像数量生成随机整数作为图像索引，再用random_split划分训练集、验证集和测试集索引

3. 根据索引将对应图像复制到目的文件夹下

In [None]:
# 复制文件
def copyfile(old_dir, new_dir, filename):
    """将文件复制到目标目录"""
    if os.path.exists(os.path.join(new_dir, filename)) :
        # 若该文件名已存在，则先将其删除，再进行复制
        os.remove(os.path.join(new_dir, filename))
        shutil.copy(old_dir, os.path.join(new_dir, filename))
    else :
        # 若该文件名以上的文件夹不存在，则先创建文件夹，再进行复制
        os.makedirs(new_dir, exist_ok = True)
        shutil.copy(old_dir, os.path.join(new_dir, filename))


In [None]:
def reorg_train_val_test(old_path, new_path, class_folders, training_ratio, val_ratio, test_ratio):
    # {"class_folder_name" : [image_name, ...]}
    class_dict = {}
    # {"class_name" : image_ammount}
    image_amount_dict = {}
    # 类别总数
    class_amount = len(class_folders)
    
    for i in range(class_amount) :
        print(f'对{class_folders[i][10 : ]}进行划分')
        # 该类别所在的文件夹
        class_images = os.listdir(old_path + '/' + class_folders[i])
        # 该类别文件夹下的图像数量
        image_amount_dict[class_names[i]] = len(class_images)
        image_amount = image_amount_dict[class_names[i]]
        # print(class_folders[i] + ':' + str(image_amount))
        # 该类别文件夹下的所有图像名称
        class_dict[class_folders[i]] = os.listdir(old_path + '/' + class_folders[i])
        
        # 生成随机整数作为图像索引
        index_list = [index for index in range(0, image_amount)]
        training_size = int(training_ratio * image_amount)
        val_size    = int(val_ratio    * image_amount)
        test_size     = image_amount - training_size - val_size
        # 将索引列表随机分为训练集、验证集和测试集
        training_index_list, val_index_list, test_index_list = \
                torch.utils.data.random_split(index_list, [training_size, val_size, test_size])
        training_index_list = list(training_index_list)
        val_index_list    = list(val_index_list) 
        test_index_list     = list(test_index_list)

        # 划分训练集
        for train_index in training_index_list :
            old_dir = os.path.join(old_path, class_folders[i], class_dict[class_folders[i]][train_index])
            new_dir = os.path.join(new_path, 'training_set', class_folders[i])
            filename = class_dict[class_folders[i]][train_index]
            copyfile(old_dir , new_dir, filename)
        # 划分验证集
        for val_index in val_index_list :
            old_dir = os.path.join(old_path, class_folders[i], class_dict[class_folders[i]][val_index])
            new_dir = os.path.join(new_path, 'val_set', class_folders[i])
            filename = class_dict[class_folders[i]][val_index]
            copyfile(old_dir , new_dir, filename)
        # 划分测试集
        for test_index in test_index_list :
            old_dir = os.path.join(old_path, class_folders[i], class_dict[class_folders[i]][test_index])
            new_dir = os.path.join(new_path, 'test_set', class_folders[i])
            filename = class_dict[class_folders[i]][test_index]
            copyfile(old_dir , new_dir, filename)
    

In [None]:
"""
划分训练集（0.5）、验证集（0.25）和测试集（0.25）
"""
if presplit :
    # 数据已划分为训练集、测试集和验证集，不用再划分
    print('数据已划分为训练集、测试集和验证集') 
    
else :
    # 数据还未划分为训练集、测试集和验证集，将其进行随机划分
    print('数据还未划分为训练集、测试集和验证集')
    
    training_ratio = 0.5
    test_ratio     = 0.25
    val_ratio    = 0.25

    # 每个类别所在的文件夹名
    class_folders = [i  for i in os.listdir(old_path)] 
    # 每个类别名称
    # class_names = [name[10 : ] for name in class_folders]
    # 总类别数
    # class_amount = len(class_folders)
    # 第0个类别的图像名称
    # class_images = os.listdir(old_path + '/' + class_folders[0])

    # print('class_folders: ', class_folders)      # class_folders: ['n02085620-Chihuahua', 'n02085782-Japanese_spaniel',...]
    # print(class_folders[0][10 : ])               # Chihuahua
    # print('class_names: ', class_names)  # class_names:  ['Chihuahua', 'Japanese_spaniel', ...]
    # print('class_amount: ', class_amount)   # class_amount:  120
    # print(class_images)      # ['n02085620_10074.jpg', 'n02085620_10131.jpg', ...]
    
    reorg_train_val_test(old_path, data_path, class_folders, training_ratio, val_ratio, test_ratio)


### 画出数据分布图

In [None]:
# 条形图（竖状）数量标识
def count_text_v(ax, item_dict): 
    i = 0
    for p in ax.patches:
        # val = p.get_height()                    # 条形的高度
        x   = p.get_x() + p.get_width() / 2   # x坐标
        y   = p.get_y() + p.get_height()         # y坐标
        plt.text(x = x, y = y, s = str(list(item_dict.values())[i]), ha = 'center', va = 'bottom',
             fontdict = dict(fontsize = 20, color = 'black'))
        i += 1


def plot_vertical_images_per_class(data_path, mode, title, save_name, row_size, col_size, y, color):
    data_folder = data_path + '/' + mode + '/'
    item_dict   = {(root.split('/')[-1])[10 : ] : len(files) for root, _, files in os.walk(data_folder)}
    tmp_list    = sorted(item_dict.items(), key = lambda x: x[1], reverse = True)
    item_dict.clear()
    for rank, (key, value) in enumerate(tmp_list, 1):
        item_dict[key] = value
    item_dict.pop('')
   
    plt.figure(figsize = (row_size, col_size))
    ax = plt.bar(list(item_dict.keys())[0 : ], list(item_dict.values())[0 : ], width = 0.6, color = color)
    count_text_v(ax, item_dict)
    # 添加水平直线，y = 0表示垂线过y = 0
    plt.axhline(y = min(item_dict.values()), ls= "--" , color = "#f4320c", linewidth = 5)
    plt.text(y = min(item_dict.values()), x = -1.3, s = str(min(item_dict.values())), ha = 'right', va = 'center',
             fontdict = dict(fontsize = 30, color = 'black'))
    plt.title(title, fontsize = 50)
    plt.xticks(ticks = range(0, 120, 1), labels = list(item_dict.keys())[0 : ], rotation = 90, fontsize = 25)
    plt.yticks(ticks = y, fontsize = 30)
    #设置x轴的范围
    plt.xlim( xmin = -1, xmax = 120)
    #设置y轴的范围
    plt.ylim(ymin = 0, ymax = y[-1])
    plt.xlabel('Classes', fontsize = 40)
    plt.ylabel('Number of Images', fontsize = 40)
    # plt.show()
    # plt.savefig("./minist.jpg")
    plt.savefig("./working/" + save_name + ".jpg", bbox_inches = 'tight')



In [None]:
# 条形图（横状）数量标识
def count_text_h(ax, item_dict): 
    i = 0
    for p in ax.patches:
        # val = p.get_width()                    # 条形的长度
        x   = p.get_x() + p.get_width()        # x坐标
        y   = p.get_y() + p.get_height() / 2   # y坐标
        plt.text(x = x, y = y, s = str(list(item_dict.values())[i]), ha = 'left', va = 'center',
             fontdict = dict(fontsize = 38, color = 'black'))
        i += 1

def plot_honrizontal_images_per_class(data_path, mode, title, save_name, row_size, col_size, y, color):
    data_folder = data_path + '/' + mode + '/'
    item_dict   = {(root.split('/')[-1])[10 : ] : len(files) for root, _, files in os.walk(data_folder)}
    tmp_list    = sorted(item_dict.items(), key = lambda x: x[1], reverse = True)
    item_dict.clear()
    for rank, (key, value) in enumerate(tmp_list, 1):
        item_dict[key] = value
    item_dict.pop('')
   
    plt.figure(figsize = (row_size, col_size))
    ax = plt.barh(list(item_dict.keys())[0 : ], list(item_dict.values())[0 : ], height = 0.8, color = color)
    count_text_h(ax, item_dict)
    
    # 添加垂直直线，x = 0表示垂线过x = 0，其余参数含义同上
    plt.axvline(x = min(item_dict.values()), ls= "--" , color = "#f4320c", linewidth = 5)
    plt.text(x = min(item_dict.values()), y = -2.07, s = str(min(item_dict.values())), ha = 'center', va = 'bottom',
             fontdict = dict(fontsize = 38, color = 'black'))
    plt.title(title, fontsize = 64)
    plt.yticks(ticks = range(0, 120, 1), labels = list(item_dict.keys())[0 : ], fontsize = 40)
    plt.xticks(ticks = y, fontsize = 38)
    #设置x轴的范围
    plt.xlim( xmin = 100, xmax = y[-1])
    #设置y轴的范围
    plt.ylim(ymin = -1, ymax = 120)
    plt.ylabel('类别', fontsize = 60)
    plt.xlabel('图像数量', fontsize = 60)
    # plt.show()
    # plt.savefig("./minist.jpg")
    plt.savefig("./working/" + save_name + ".jpg", bbox_inches = 'tight')



In [None]:
# 条形图

y          = [i for i in range(0, 280, 20)]
y_training = [i for i in range(0, 140, 10)]
y_val    = [i for i in range(0, 80, 10)]
y_test     = [i for i in range(0, 80, 10)]

if presplit :
    plot_honrizontal_images_per_class(old_path, mode = '', 
                          # title     = 'Distribution of Classes', 
                          title     = '斯坦福犬类数据集各类别数量分布', 
                          save_name = 'Distribution_h_of_Classes', 
                          row_size  = 26, col_size = 40, y = y, color = '#5684ae')

    plot_honrizontal_images_per_class(os.path.join(data_path, 'training_set'), mode = '', 
                          # title     = 'Distribution of Training_Set Classes', 
                          title     = '训练集各类别数量分布', 
                          save_name = 'Distribution_h_of_Training_Set_Classes', 
                          row_size  = 26, col_size = 40, y = y_training, color = '#5684ae')

    plot_honrizontal_images_per_class(os.path.join(data_path, 'val_set'), mode = '', 
                          # title     = 'Distribution of Val_Set Classes', 
                          title     = '验证集各类别数量分布', 
                          save_name = 'Distribution_h_of_Val_Set_Classes', 
                          row_size  = 26, col_size = 40, y = y_val, color = '#5684ae')

    plot_honrizontal_images_per_class(os.path.join(data_path, 'test_set'), mode = '', 
                          # title     = 'Distribution of Test_Set Classes', 
                          title     = '测试集各类别数量分布', 
                          save_name = 'Distribution_h_of_Test_Set_Classes', 
                          row_size  = 26, col_size = 40, y = y_test, color = '#5684ae')
    
else :
    plot_honrizontal_images_per_class(old_path, mode = '', 
                          title     = 'Distribution of Classes', 
                          save_name = 'Distribution_h_of_Classes', 
                          row_size  = 26, col_size = 40, y = y, color = '#5684ae')

# off blue  #5684ae

In [None]:
# 柱状图
if presplit :
    plot_vertical_images_per_class(old_path, mode = '', 
                          title     = 'Distribution of Classes', 
                          save_name = 'Distribution_of_Classes', 
                          row_size  = 64, col_size = 42, y = y, color = sns.xkcd_rgb['bronze'])

    plot_vertical_images_per_class(os.path.join(data_path, 'training_set'), mode = '', 
                          title     = 'Distribution of Training_Set Classes', 
                          save_name = 'Distribution_of_Training_Set_Classes', 
                          row_size  = 64, col_size = 42, y = y_training, color = sns.xkcd_rgb['bronze'])

    plot_vertical_images_per_class(os.path.join(data_path, 'val_set'), mode = '', 
                          title     = 'Distribution of Val_Set Classes', 
                          save_name = 'Distribution_of_Val_Set_Classes', 
                          row_size  = 64, col_size = 42, y = y_val, color = sns.xkcd_rgb['bronze'])

    plot_vertical_images_per_class(os.path.join(data_path, 'test_set'), mode = '', 
                          title     = 'Distribution of Test_Set Classes', 
                          save_name = 'Distribution_of_Test_Set_Classes', 
                          row_size  = 64, col_size = 42, y = y_test, color = sns.xkcd_rgb['bronze'])
    
    
else :
    plot_vertical_images_per_class(old_path, mode = '', 
                          title     = 'Distribution of Classes', 
                          save_name = 'Distribution_of_Classes', 
                          row_size  = 42, col_size = 42, y = y, color = sns.xkcd_rgb['bronze'])

### 图像增广

#### 调用 torchvision.datasets.ImageFolder 返回训练数据与标签
**1. torchvision.datasets.ImageFolder 有 root, transform, target_transform, loader四个参数，现在依次介绍这四个参数**

**root**：图片存储的根目录，即各类别文件夹所在目录的上一级目录，在下面的例子中是’./data/train/’。

**transform**：对图片进行预处理的操作（函数），原始图片作为输入，返回一个转换后的图片。

**target_transform**：对图片类别进行预处理的操作，输入为 target，输出对其的转换。如果不传该参数，即对 target 不做任何转换，返回的顺序索引 0,1, 2…

**loader**：表示数据集加载方式，通常默认加载方式即可。

**2. 该 API 有以下成员变量:**

**self.classes**：用一个 list 保存类别名称；

**self.class_to_idx**：类别对应的索引，与不做任何转换返回的 target 对应；

**self.imgs**：保存(img-path, class) tuple的 list；

原文链接：https://blog.csdn.net/qq_33254870/article/details/103362621

#### torch.utils.data.DataLoader

https://blog.csdn.net/qq_36044523/article/details/118914223

https://blog.csdn.net/qq_40788447/article/details/114937779

#### 标准化（归一化）

验证集的标准化和训练集一样

测试集标准化的均值和标准差应该来源于训练集得到的均值和标准偏差

在训练之前把数据划分成训练集和测试集，接着对训练集进行标准化，同时保存标准化时计算出来的参数，例如最大值最小值或者是方差之类的，具体由你使用的标准化方法而定。最后再用这些参数来标准化你的测试集，以及之后所有新的测试样本。

由于该数据集源于ImageNet，因此可以直接用ImageNet的mean和std

1) RandomRotation(RR)

2) RandomResizedCrop(RRC)

3) RandomHorizontalFlip(RHF)

4) ColorJitter(CJ)

5) RandomRotation + RandomHorizontalFlip(RR-RHF)

6) RandomRotation + RandomResizedCrop + RandomHorizontalFlip(RR-RRC-RHF)

6) RandomRotation+ColorJitter+RandomHorizontalFlip(RR-CJ-RHF)


In [None]:
'''
图像转换为224 X 224
'''
def load_transform_images(images_path, batch_size, threads, mean, std):
    # 对训练集进行图像增广
    training_transform = transforms.Compose([
                                        # 随机旋转, degree = 15, 20, 30, 25
                                        transforms.RandomRotation(degrees = 20),
                                        # 修改亮度brightness、对比度contrast、饱和度saturation和色调hue
                                        transforms.ColorJitter(brightness = 0.4, contrast = 0.4, saturation = 0, hue = 0),
                                        # 随机长宽比裁剪
                                        # transforms.RandomResizedCrop((224, 224), scale=(0.7, 1.0), ratio=(0.85, 1.1)),
                                        transforms.Resize((224, 224)),
                                        # 水平翻转
                                        transforms.RandomHorizontalFlip(p = 0.5),
                                        transforms.ToTensor(),
                                        # test4进行数据标准化
                                        transforms.Normalize(torch.Tensor(mean),
                                                             torch.Tensor(std))
                                        ])
    # 验证集和测试集不进行图像增广
    test_transform = transforms.Compose([
                                        transforms.Resize((224,224)),
                                        # transforms.CenterCrop((224,224)),
                                        transforms.ToTensor(),
                                        transforms.Normalize(torch.Tensor(mean),
                                                             torch.Tensor(std))
                                        ])

    val_transform = transforms.Compose([
                                        transforms.Resize((224,224)),
                                        # transforms.CenterCrop((224,224)),
                                        transforms.ToTensor(),
                                        transforms.Normalize(torch.Tensor(mean),
                                                             torch.Tensor(std))
                                        ])

    training_set = datasets.ImageFolder(root = images_path + '/training_set', transform = training_transform)
    val_set      = datasets.ImageFolder(root = images_path + '/val_set',      transform = val_transform)
    test_set     = datasets.ImageFolder(root = images_path + '/test_set',     transform = test_transform)
    dataset      = training_set
    
    #  数据集的加载器，自动将数据分割成mini-batch
    training_set_loader = DataLoader(training_set, batch_size = batch_size, num_workers = threads, shuffle = True)
    val_set_loader      = DataLoader(val_set,      batch_size = batch_size, num_workers = threads, shuffle = False)
    test_set_loader     = DataLoader(test_set,     batch_size = batch_size, num_workers = threads, shuffle = False)

    return training_set_loader, test_set_loader, val_set_loader, dataset, training_set, test_set, val_set


#### mean和std的取法
为什么用mean = [0.485, 0.456, 0.406]，std = [0.229, 0.224, 0.225] ？

因为这一组数据是从imagenet训练集中抽样算出来的。

In that example, they are using the mean and stddev of ImageNet, but if you look at their MNIST examples, the mean and stddev are 1-dimensional (since the inputs are greyscale-- no RGB channels).

Whether or not to use ImageNet's mean and stddev depends on your data. **Assuming your data are ordinary photos of "natural scenes"(people, buildings, animals, varied lighting/angles/backgrounds, etc.), and assuming your dataset is biased in the same way ImageNet is (in terms of class balance), then it's ok to normalize with ImageNet's scene statistics.** If the photos are "special" somehow (color filtered, contrast adjusted, uncommon lighting, etc.) or an "un-natural subject" (medical images, satellite imagery, hand drawings, etc.) then I would recommend correctly normalizing your dataset before model training!

In [None]:
# batch_size取128, 256
batch_size   = 128
threads      = 0
# 进行数据标准化的均值和标准差
mean         = [0.485, 0.456, 0.406]
std          = [0.229, 0.224, 0.225]

training_set_loader, test_set_loader, val_set_loader, dataset, training_set, test_set, val_set = \
                  load_transform_images(data_path, batch_size, threads, mean, std)

class_names = dataset.classes
class_names = [classes[10:] for classes in class_names]


### 显示图像预处理效果

In [None]:
print("总共有", len(class_names), "种狗")
print("训练集大小：", len(training_set))
print("测试集大小：", len(test_set))
print("验证集大小：", len(val_set))
# print(class_names)
# print(training_set[0].size())
# print(training_set.shape)
print(dataset[1])
print(dataset.imgs[1])
print(dataset.imgs[1][0])

img = Image.open(dataset.imgs[1][0], "r")
#img.show()
plt.figure("sample dog")
plt.axis('off')  # 去掉坐标轴
plt.imshow(img)
plt.show()

In [None]:
def plot_grid_images(training_set, batch_size, class_names, mean, std, rows, columns, size, title_size):
    sampler = RandomSampler(training_set, num_samples = batch_size, replacement = True)
    train_loader = DataLoader(training_set, sampler = sampler, shuffle = False, batch_size = batch_size, num_workers = 0)
    
    dataiter = iter(train_loader)
    images, labels = dataiter.next()

    plt.figure(figsize = (size, size))
    
    '''
    # 九宫图
    for i in range(rows * columns):
        plt.subplot(rows, columns, i + 1)
        plt.title(class_names[labels.numpy()[i]], fontsize = title_size)
        img = images[i].permute(1, 2, 0)
        # 
        # img = torch.tensor(std) * img + torch.tensor(mean)
        plt.axis('off')
        plt.imshow(img, interpolation = 'none')
        # plt.savefig("./Sample_Images/sample_image" + str(i) + ".jpg")
        # plt.tight_layout()
        print(i + 1, ': ', class_names[labels.numpy()[i]])
    plt.tight_layout()
    plt.savefig(os.path.join(result_path, 'sample_image.png'), bbox_inches = 'tight')
    '''
        
    # 单张图
    for i in range(rows * columns):
        # plt.subplot(rows, columns, i + 1)
        plt.title(class_names[labels.numpy()[i]], fontsize = title_size * 2)
        img = images[i].permute(1, 2, 0)
        # img = torch.tensor(std) * img + torch.tensor(mean)
        # img = images[i]
        plt.axis('off')
        plt.imshow(img, interpolation = 'none')
        plt.savefig(os.path.join(result_path, 'sample_image' + str(i) + '.png'), bbox_inches = 'tight')
        # plt.tight_layout()


In [None]:
plot_grid_images(training_set, batch_size, class_names, mean, std, rows = 3, columns = 3, size = 40, title_size = 60)


## 3 获取网络模型

In [None]:
def load_network(finetune_net, net_name, dropout_ratio, class_names, unfrozen_layers):
    for name, child in finetune_net.named_children():
        '''
        print(name + ' is unfrozen')
        for param in child.parameters():
            param.requires_grad = True
        '''
        
        if name in unfrozen_layers:
            print(name + ' is unfrozen')
            for param in child.parameters():
                param.requires_grad = True
        else:
            print(name + ' is frozen')
            for param in child.parameters():
                param.requires_grad = False
    
    # 对输出层重新设置
    num_inftrs = finetune_net.fc.in_features
    # 1. 输出层直接为一个全连接层
    # finetune_net.fc = nn.Linear(num_inftrs, len(class_names))
    # nn.init.kaiming_uniform_(tensor = finetune_net.fc.weight, a = 0, mode = 'fan_in')
    
    # 2. 输出层为一个Sequential子网络
    finetune_net.fc = nn.Sequential(nn.Linear(num_inftrs, 256),
                                    nn.ReLU(),
                                    # test6,不使用dropout
                                    nn.Dropout(p = dropout_ratio),
                                    nn.Linear(256, len(class_names)))
    # 对finetune_net的fc层权重进行何（恺明）初始化
    # nn.init.kaiming_uniform_(tensor = finetune_net.fc.weight, a = 0, mode = 'fan_in')
    nn.init.kaiming_uniform_(tensor = finetune_net.fc[0].weight, a = 0, mode = 'fan_in')
    nn.init.kaiming_uniform_(tensor = finetune_net.fc[3].weight, a = 0, mode = 'fan_in')
    
    display(finetune_net)
    
    # torch.save(finetune_net.state_dict(), os.path.join(result_path, str(net_name) + '.pth'))
    # finetune_net.load_state_dict(torch.load(os.path.join(result_path, str(net_name) + '.pth')))
    
    total_params = sum(param.numel() for param in finetune_net.parameters())
    print(f'{total_params:,} total parameters')

    total_trainable_params = sum(param.numel() for param in finetune_net.parameters() if param.requires_grad)
    print(f'{total_trainable_params:,} training parameters')
    
    return finetune_net


In [None]:
# test6使用预训练模型进行微调
finetune_net = resnet.resnet50(pretrained = True)
net_name = 'resnet50'
# 对layer3、4进行微调（fine_tuning），其它层冻结，保留参数，对fc层（输出层）进行完全训练 'layer1', 'layer2','layer3', 'layer4', 'fc'
unfrozen_layers = ['layer3', 'layer4', 'fc'] 
# unfrozen_layers = ['fc'] 

# test6不使用droupout 0.2, 0.3, 0.6, 0.7, 0.8
dropout_ratio = 0.3

finetune_net = load_network(finetune_net, net_name, dropout_ratio, class_names, unfrozen_layers)

print(f'training set: {len(training_set)}\nvalidation set: {len(val_set)}\ntest set: {len(test_set)}')


## 4 模型训练

In [None]:
# 如果param_group = True，输出层中的模型参数将使用十倍的学习率
def training_model(result_path, model_name, model, training_loader, val_loader, learning_rate, epochs, momentum, weight_decay, patience, n_epochs_stop, milestones):
    
    # 
    criterion = nn.CrossEntropyLoss()
    
    param_group = True
    if param_group:
        # 进行分组学习，调整不同分组参数的学习率
        # fc层（输出层）的参数是随机初始化的，通常需要更高的学习率才能从头开始训练，这里设为学习率的10倍
        params_1x = [ param for name, param in model.named_parameters()
                      if name not in ['fc.0.weight', 'fc.0.bias', 'fc.3.weight', 'fc.3.bias'] ]
                      # if name not in ['fc.weight', 'fc.bias'] ]
        '''
        optimizer = SGD([{'params': params_1x}, 
                         {'params': model.fc.parameters(), 'lr': learning_rate * 10}], 
                        lr = learning_rate, momentum = momentum, weight_decay = weight_decay)
        '''
        # test6用Adam算法进行梯度优化，进行微调的参数的学习率通常会比较小，而fc层需要大一些，设为其10倍
        optimizer = Adam([{'params': params_1x},
                          {'params': model.fc.parameters(), 'lr': learning_rate * 10}],
                         lr = learning_rate, weight_decay = weight_decay)
        
    else:
        '''
        optimizer = SGD(model.parameters(), lr = learning_rate,
                        weight_decay = weight_decay)
        '''
        optimizer = Adam(model.parameters(), lr = learning_rate,
                         weight_decay = weight_decay)
       
    # test4 用Adam算法进行梯度优化
    # optimizer = Adam(model.parameters(), lr = learning_rate, weight_decay = weight_decay)
    # 用随机梯度下降法进行梯度优化
    # optimizer = SGD(model.parameters(), lr = learning_rate, momentum = momentum, weight_decay = weight_decay)
    # 用ReduceLROnPlateau优化减小学习率，基于验证指标的调整方法，当指标停止改善时，降低学习率
    scheduler = ReduceLROnPlateau(optimizer, patience = patience, factor = 0.2, verbose = True)
    '''
    epoch <= m[1]         lr = 不变
    m[1] < epoch <= m[2]  lr变为gamma倍
    m[2] < epoch          lr变为gamma倍
    '''
    # scheduler = MultiStepLR(optimizer, milestones = milestones, gamma = 0.5, verbose = True)
    
    loaders     = {'training': training_loader, 'val': val_loader}
    losses      = {'training': [], 'val': []}
    accuracies  = {'training': [], 'val': []}
    losses2     = {'training': [], 'val': []}
    accuracies2 = {'training': [], 'val': []}
    
    y_test = []
    preds  = []
    
    min_val_loss     = np.Inf
    epochs_no_improv = 0
    
    if torch.cuda.is_available():
        if torch.cuda.device_count() > 1:
            # 
            model = nn.DataParallel(model)
            print(f'Using {torch.cuda.device_count()} GPUs')
        else :
            print(f'Using {torch.cuda.device_count()} GPU')
        model.cuda()
    else:
        print('Using CPU')
    
    start = time.time()
    # 训练轮次（周期），包括训练和验证
    for epoch in range(epochs):
        for mode in ['training', 'val']:
            if mode == 'training':
                # 
                model.train()
            if mode == 'val':
                model.eval()
            
            epoch_loss = 0
            epoch_acc  = 0
            samples    = 0
            
            # 训练批次，一个批量迭代一次
            for i, (inputs, targets) in enumerate(loaders[mode]):
                if torch.cuda.is_available():
                    inputs  = inputs.cuda()
                    targets = targets.cuda()
                # 将梯度初始化为零，以批量为单位，一个batch的loss关于weight的导数是所有sample的loss关于weight的导数的累加和
                optimizer.zero_grad()
                # 前向传播求出预测的值
                output = model(inputs)
                # 求损失（一个批量的平均损失）
                loss   = criterion(output, targets)
                
                if mode == 'training':
                    # 反向传播求梯度
                    loss.backward()
                    # 更新所有模型参数，optimizer.step()常在每个mini-batch中，而scheduler.step()常在epoch里,不绝对，可以根据具体的需求来做
                    optimizer.step()
                else:
                    y_test.extend(targets.data.tolist())
                    preds.extend(output.max(1)[1].tolist())
                
                if torch.cuda.is_available():
                    acc = accuracy_score(targets.data.cuda().cpu().numpy(), output.max(1)[1].cuda().cpu().numpy())
                else:
                    acc = accuracy_score(targets.data, output.max(1)[1])
                # inputs.shape[0]为batch_size
                # 训练集或验证集的损失总量、准确度总量、数据（sample）总量
                epoch_loss += loss.data.item() * inputs.shape[0]
                epoch_acc  += acc * inputs.shape[0]
                samples    += inputs.shape[0]
                losses2[mode].append(epoch_loss / samples)
                accuracies2[mode].append(epoch_acc / samples)
                
                # 每隔 iterations // 5 次打印一次训练状态，每周期打印
                if i % (len(loaders[mode]) // 5) == 0:
                    print(f'[{mode}] Epoch {epoch + 1} / {epochs} Iteration {i + 1} / {len(loaders[mode])} Loss: {epoch_loss/samples:0.4f} Accuracy: {epoch_acc/samples:0.4f}  Already trained {time.time() - start :0.2f} s')
                    # 
                    # losses2[mode].append(epoch_loss/samples)
                    # accuracies2[mode].append(epoch_acc/samples)
            # 整个训练集或验证集的平均损失、准确度   
            epoch_loss /= samples
            epoch_acc  /= samples
            losses[mode].append(epoch_loss)
            accuracies[mode].append(epoch_acc)
            
            print(f'[{mode}] Epoch {epoch + 1} / {epochs} Iteration {i + 1} / {len(loaders[mode])} Loss: {epoch_loss:0.4f} Accuracy: {epoch_acc:0.4f}  Already trained {time.time() - start :0.2f} s')
            
            
            if mode == 'val':
            # if (mode == 'val') & (epoch in milestones):
                # 更新学习率lr, ReduceLROnPlateau
                scheduler.step(epoch_loss)
                # MultiStepLR
                # scheduler.step()
        
        # Early stop，如果有 n_epochs_stop 轮训练没有提升，就提前结束训练
        if mode == 'val':
            if epoch_loss < min_val_loss:
                torch.save(model.state_dict(), os.path.join(result_path, str(model_name) + '.pth'))
                epochs_no_improv = 0
                min_val_loss = epoch_loss
            else:
                epochs_no_improv += 1
                print(f'Epochs with no improvement {epochs_no_improv}')
                if epochs_no_improv == n_epochs_stop:
                    print('Early stopping!')
                    return model, (losses, accuracies), (losses2, accuracies2), y_test, preds
                model.load_state_dict(torch.load(os.path.join(result_path, str(model_name) + '.pth')))
                
    # print(f'Training time: {time.time()-start} min.')
    print(f'The whole process took {time.time() - start :0.2f} s.')
    return model, (losses, accuracies), (losses2, accuracies2), y_test, preds


In [None]:
# 由于模型参数是在ImageNet数据集上预训练的，并且足够好，因此通常只需要较小的学习率即可微调这些参数（layer1234: 0.0001, fc: 0.001）
# 0.001+0.01, 0.0001+0.001, 0.00001+0.0001, 0.000001+0.00001
learning_rate = 0.00001
epochs        = 100
# 0.9
momentum      = 0.9
# test6 使用权重衰减0.0005, 0.001
weight_decay  = 0.0005
# 可容忍的度量指标，没有提升的epoch数目，用于降低学习率
patience      = 3

milestones = [5, 9, 15, 25]

# 早停法Early stop，如果有n轮训练没有提升，就提前结束训练
n_epochs_stop = 6
# results_path  = './'

finetune_net, loss_acc, loss_acc2, y_test, preds = training_model(result_path, net_name, finetune_net, 
                                                       training_set_loader, val_set_loader, 
                                                       learning_rate, epochs, momentum, 
                                                       weight_decay, patience, n_epochs_stop, 
                                                       milestones)


### 显示训练过程

In [None]:
# 以周期为单位
def plot_logs_classification(result_path, model_name, logs, epochs, max_loss, min_acc):
    """
    """
    #if not os.path.exists(result_path+'/'+model_name):
    #    os.makedirs(result_path+'/'+model_name)
        
    training_losses, training_accuracies, val_losses, val_accuracies = \
        logs[0]['training'], logs[1]['training'], logs[0]['val'], logs[1]['val']
    
    plt.figure(figsize = (36, 12))
    plt.subplot(121)
    plt.plot(training_losses, color = 'royalblue', linestyle = 'solid')
    plt.plot(val_losses, color = 'darkorange', linestyle = 'dashed')
    plt.title('Loss', fontsize = 30)
    plt.yticks(fontsize = 25)
    plt.xticks(fontsize = 25)
    #设置x轴的范围
    plt.xlim(xmin = 0, xmax = epochs)
    #设置y轴的范围
    plt.ylim(ymin = 0, ymax = max_loss)
    plt.legend(['Training Loss', 'Validation Loss'], fontsize = 25, loc = 'best')
    plt.xlabel('Epoch', fontsize = 25)
    plt.ylabel('Loss', fontsize = 25)
    plt.grid()
    
    plt.subplot(122)
    plt.plot(training_accuracies, color = 'royalblue', linestyle = 'solid')
    plt.plot(val_accuracies, color = 'darkorange', linestyle = 'dashed')
    plt.title('Accuracy', fontsize = 30)
    plt.yticks(fontsize = 25)
    plt.xticks(fontsize = 25)
    #设置x轴的范围
    plt.xlim(xmin = 0, xmax = epochs)
    #设置y轴的范围
    plt.ylim(ymin = min_acc, ymax = 1)
    plt.legend(['Training Accuracy', 'Validation Accuracy'], fontsize = 25, loc = 'lower right')
    plt.xlabel('Epoch', fontsize = 25)
    plt.ylabel('Accuracy', fontsize = 25)
    plt.grid()
    
    # plt.savefig(result_path + str(model_name) + '_graph_v1.png', bbox_inches = 'tight')
    plt.savefig(result_path + 'test6-2_RR-CJb4c4-RHF-v2_lr0.00001wd0.0005dr0.3.png', bbox_inches = 'tight')


In [None]:
epochs   = 40
max_loss = 5
min_acc  = 0

plot_logs_classification(result_path, net_name, loss_acc, epochs, max_loss, min_acc)


In [None]:
# 以迭代次数为单位
def plot_logs_classification2(result_path, model_name, logs, training_iter, val_iter, max_loss, min_acc):
    """
    """
    #if not os.path.exists(result_path+'/'+model_name):
    #    os.makedirs(result_path+'/'+model_name)
        
    training_losses, training_accuracies, val_losses, val_accuracies = \
        logs[0]['training'], logs[1]['training'], logs[0]['val'], logs[1]['val']
    # 训练损失
    plt.figure(figsize = (84, 64))
    plt.subplot(221)
    plt.plot(training_losses, color = 'royalblue', linestyle = 'solid')
    # plt.plot(val_losses, color = 'darkorange', linestyle = 'dashed')
    # plt.title('Training Loss', fontsize = 30)
    plt.yticks(fontsize = 50)
    plt.xticks(fontsize = 50)
    #设置x轴的范围
    plt.xlim( xmin = 0, xmax = training_iter)
    #设置y轴的范围
    plt.ylim(ymin = 0, ymax = max_loss)
    # plt.legend(['Training Loss', 'Validation Loss'], fontsize = 25, loc = 'best')
    plt.xlabel('Iteration', fontsize = 50)
    plt.ylabel('Training Loss', fontsize = 50)
    plt.grid()
    
    # 验证损失
    plt.subplot(222)
    # plt.plot(training_losses, color = 'royalblue', linestyle = 'solid')
    plt.plot(val_losses, color = 'royalblue', linestyle = 'solid')
    # plt.title('Loss', fontsize = 30)
    plt.yticks(fontsize = 50)
    plt.xticks(fontsize = 50)
    #设置x轴的范围
    plt.xlim( xmin = 0, xmax = val_iter)
    #设置y轴的范围
    plt.ylim(ymin = 0, ymax = max_loss)
    # plt.legend(['Training Loss', 'Validation Loss'], fontsize = 25, loc = 'best')
    plt.xlabel('Iteration', fontsize = 50)
    plt.ylabel('Validation Loss', fontsize = 50)
    plt.grid()
    
    # 训练精确度
    plt.subplot(223)
    plt.plot(training_accuracies, color = 'royalblue', linestyle = 'solid')
    # plt.plot(val_accuracies, color = 'darkorange', linestyle = 'dashed')
    # plt.title('Accuracy', fontsize = 30)
    plt.yticks(fontsize = 50)
    plt.xticks(fontsize = 50)
    #设置x轴的范围
    plt.xlim( xmin = 0, xmax = training_iter)
    #设置y轴的范围
    plt.ylim(ymin = min_acc, ymax = 1)
    # plt.legend(['Training Accuracy', 'Validation Accuracy'], fontsize = 25, loc = 'best')
    plt.xlabel('Iteration', fontsize = 50)
    plt.ylabel('Training Accuracy', fontsize = 50)
    plt.grid()
    
    # 验证精确度
    plt.subplot(224)
    # plt.plot(training_accuracies, color = 'royalblue', linestyle = 'solid')
    plt.plot(val_accuracies, color = 'royalblue', linestyle = 'solid')
    # plt.title('Accuracy', fontsize = 30)
    plt.yticks(fontsize = 50)
    plt.xticks(fontsize = 50)
    #设置x轴的范围
    plt.xlim( xmin = 0, xmax = val_iter)
    #设置y轴的范围
    plt.ylim(ymin = min_acc, ymax = 1)
    # plt.legend(['Training Accuracy', 'Validation Accuracy'], fontsize = 25, loc = 'best')
    plt.xlabel('Iteration', fontsize = 50)
    plt.ylabel('Validation Accuracy', fontsize = 50)
    plt.grid()
    
    # plt.subplots_adjust(wspace = 5, hspace = )
    # plt.savefig(result_path + str(model_name) + '_graph_v1.png', bbox_inches = 'tight')
    plt.savefig(result_path + 'test6-4_RR-CJb4c4-RHF-v2_lr0.00001wd0.0005dr0.3.png')


In [None]:
training_iter = epochs * 81
val_iter      = epochs * 40
max_loss      = 5
min_acc       = 0

plot_logs_classification2(result_path, net_name, loss_acc2, training_iter, val_iter, max_loss, min_acc)


## 5  模型测试

In [None]:
def test_model(result_path, model_name, model, test_loader):
    model.load_state_dict(torch.load(result_path + str(model_name) + '.pth'))

    if torch.cuda.is_available():
        model.cuda()
    model.eval()
    
    preds = []
    trues = []
    
    for i, (inputs, targets) in enumerate(test_loader):
        if torch.cuda.is_available():
            inputs = inputs.cuda()
            pred = model(inputs).data.cuda().cpu().numpy().copy()
        else:
            pred = model(inputs).data.numpy().copy()
            
        true = targets.numpy().copy()
        preds.append(pred)
        trues.append(true)

        if i % (len(test_loader) // 5) == 0:
            print(f'Iteration {i + 1} / {len(test_loader)}')
    return np.concatenate(preds), np.concatenate(trues)


In [None]:
preds_test, y_true = test_model(result_path, net_name, finetune_net, test_set_loader)

acc = accuracy_score(y_true, preds_test.argmax(1))
score = f1_score(y_true, preds_test.argmax(1), average = 'micro')
print(f'Accuracy: {acc: 0.4f}\nMicro F1-score: {score: 0.4f}')


In [None]:
print(classification_report(y_true, preds_test.argmax(1), target_names = class_names))


In [None]:
def display_confusion_matrix(result_path, model_name, y_true, preds, class_names, annot, figsize, fontsize) :
    #if not os.path.exists(result_path+'/'+model_name):
    #    os.makedirs(result_path+'/'+model_name)
    
    # F1-Score指标综合了Precision与Recall的产出的结果，取值范围从0到1的，1代表模型的输出最好，0代表模型的输出结果最差。
    acc   = accuracy_score(y_true, preds.argmax(1))
    score = f1_score(y_true, preds.argmax(1), average = 'micro')
    cm    = confusion_matrix(y_true, preds.argmax(1))
    # df_cm = pd.DataFrame(cm, index = class_names, columns = class_names)
    class_index = [i for i in range(1, 121, 1)]
    df_cm = pd.DataFrame(cm, index = class_index, columns = class_index)
    np.set_printoptions(precision = 2)
    
    # string1   = 'Confusion Matrix for Test Data'
    string1   = 'Confusion Matrix for Test Data'
    # string2   = f'Accuracy is {acc:0.4f}; Micro F1-score is {score:0.4f}'
    # title_str = string1.center(len(string2)) + '\n' + string2
    title_str = string1

    plt.figure(figsize = figsize)
    seaborn.set(font_scale = 1.2)
    seaborn.heatmap(df_cm, annot = annot, annot_kws = {'size': 18, 'weight':'bold', 'color': 'white'}, fmt = 'd')
    plt.yticks(fontsize = 18)
    plt.xticks(fontsize = 18)
    plt.ylabel('True Label', fontsize = fontsize)
    plt.xlabel('Predicted Label', fontsize = fontsize)
    plt.title(title_str, fontsize = fontsize)
    
    plt.savefig(os.path.join(result_path, 'test6_RR-CJb4c4-RHF-v2_lr0.00001wd0.0005dr0.3_conf_mat.png'), bbox_inches = 'tight')
    

In [None]:
# 混淆矩阵热力图可视化
display_confusion_matrix(result_path, net_name, y_true, preds_test, class_names, annot = True, figsize = (40, 36), fontsize = 42)
