# 3.1 预训练模型原理
* <u>每个小单元格若未经说明均各自进行</u>

## 3.1.1 状态字典与保存整个模型

* 测试权值相等输出相等

In [None]:
import torch

#   加载模型
model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=True)

#   随机产生输入
x = torch.rand(1, 3, 224, 224)

#   将模型设置为评估模式
#   先调用评估模式让dropout和batchnorm层处于评估状态
model.eval()

#   将随机生成张量其输入模型中，查看输出的前5个元素
#   no_grad方法关闭梯度计算
#   reshape（-1）将张量铺平
with torch.no_grad():
    print(model(x).reshape(-1)[:5])

#   这里直接保存模型
torch.save(model, 'model.pth')

#   直接加载模型
new_model = torch.load('model.pth')

#   或者重新创建模型，然后加载参数
#   load_state_dict方法中有一个参数strict用于控制是否严格让模型参数对应，默认为true，此时如果不一致会抛出错误
#   new_model.load_state_dict(state_dict.state_dict())

#   评估模式保证输出一致
new_model.eval()

with torch.no_grad():
    print(new_model(x).reshape(-1)[:5])

## 3.1.2 通过设备保存与只保存状态字典

* 对比状态字典所在设备

In [None]:
import torch

#   当调用load时，会将状态字典保存在保存时所在的设备上
#   当调用load_state_dict时，会将状态字典产生的模型权重保存在当前模型存在的设备上
SAVE_PATH = 'model.pth'
device = torch.device('cpu')

net = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=True)
net.to(device)

sd = net.state_dict()
print('Save state dict device = %s' % (sd['conv1.weight'].device))

torch.save(net.state_dict(), SAVE_PATH)

#   可以指定map_location参数来指定加载的设备，默认为保存时的设备
#   sd = torch.load(SAVE_PATH, map_location='cpu')
sd = torch.load(SAVE_PATH)
print('Load state dict device = %s' % (sd['conv1.weight'].device))

---

# 3.2 加载imagenet预训练模型

In [None]:
#   再torchvision.models包中有很多模型
import torchvision.models as models

#   默认pretrained是false
resnet18 = models.resnet18()
alexnet = models.alexnet()
vgg16 = models.vgg16()
squeezenet = models.squeezenet1_0()
densenet = models.densenet161()
inception = models.inception_v3()
googlenet = models.googlenet()
shufflenet = models.shufflenet_v2_x1_0()
mobilenet = models.mobilenet_v2()
resnext50_32x4d = models.resnext50_32x4d()
wide_resnet50_2 = models.wide_resnet50_2()
mnasnet = models.mnasnet1_0()

In [None]:
import torchvision.models as models

resnet18 = models.resnet18(pretrained=True)
print(resnet18)

---

# 3.3 准备数据

* <u>以下3.3、3.4、3.5代码连续进行</u>

* 下载数据集hymenoptera，结构如下
* 下载链接：https://www.kaggle.com/datasets/thedatasith/hymenoptera?resource=download
```
hymenoptera_data
|--train
    |--ants     # 124张
    |--bees     # 121张
|--val
    |--ants     # 70张
    |--bees     # 83张
```
* 6个文件夹，398个文件

## 3.3.1 加载数据集

* 首先使用torchvision.datasets.ImageFolder类快速封装数据集

* 以下是这个类的声明
```python
torchvision.datasets.ImageFolder(root, 
                                transform = None, 
                                target_transform = None, 
                                loader = <function default_loader> , 
                                is_valid_file=None)
```
* 数据集中图片大小不一致，因此要对图片进行裁剪，归一化提升效果

In [None]:
import torchvision

#   在训练集上：扩充、归一化
#   在验证集上：归一化
data_transforms = {
    'train': torchvision.transforms.Compose([
        #   随即裁剪一个区域，然后调整大小
        torchvision.transforms.RandomResizedCrop(224),
        #   随机水平翻转
        torchvision.transforms.RandomHorizontalFlip(),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': torchvision.transforms.Compose([
        torchvision.transforms.Resize(256),
        torchvision.transforms.CenterCrop(224),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}
data_dir = 'hymenoptera'
image_datasets = {x: torchvision.datasets.ImageFolder(root=data_dir + '/' + x, transform=data_transforms[x]) for x in
                  ['train', 'val']}
print(image_datasets['train'].classes)

* 生成dataloader字典

In [None]:
import torch

#   生成数据加载器
train_loader = torch.utils.data.DataLoader(image_datasets['train'], batch_size=4, shuffle=True)
val_loader = torch.utils.data.DataLoader(image_datasets['val'], batch_size=4, shuffle=True)
dataloader = {'train': train_loader, 'val': val_loader}

## 3.3.2 使用matplotlib可视化数据

* transforms中图片维度[H,W,C]值[0,255]，转换成张量维度[C,H,W]值[0,1]，且经过了归一化
* matplotlib需要各维度含义是[H,W,C]，且不需要归一化
* 所以需要反向归一化操作一遍

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch

def imshow(inp, title=None):
    #   转换维度
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    #   将inp限制在0-1之间
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)

#   类别列表
class_type = ['ants', 'bees']

In [None]:
#   获取一个batch的数据
inputs, classes = next(iter(train_loader))

#   make_grid拼接时会添加会框框，因此分辨率变化了
out = torchvision.utils.make_grid(inputs)

imshow(out, title=[class_type[x] for x in classes])

# 3.4开始训练

* 将训练写成一个函数，参数是网络模型、损失函数，返回最高准确率和对应模型对象
```python
def train_model(model, criterion, optimizer, scheduler, epoch_num):
    Args:
        model
        criterion   损失函数
        optimizer   优化器
        scheduler   学习率调度器
        epoch_num
    Returns:
        model
        best_acc
```

In [None]:
import copy
from tqdm import tqdm

def train_model(model, criterion, optimizer, scheduler, epoch_num, device):
    best_acc = 0.0
    best_model = None
    running_loss = 0.0
    running_corrects = 0
    for epoch in tqdm(range(epoch_num)):
        #   训练集和验证集交替进行前向传播，先训练集后验证集
        for phase in ['train','val']:
            for inputs, labels in dataloader[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                #   根据阶段决定是否启用梯度计算
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    #   反向传播只在训练阶段进行
                    if phase == 'train':
                        #   梯度清零,因为梯度是累加的
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()
                    #   计算损失
                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)
            epoch_loss = running_loss / len(image_datasets[phase])
            epoch_acc = running_corrects / len(image_datasets[phase])
            #   打印
            print('Epoch %d %s loss: %.3f acc: %.3f' % (epoch, phase, epoch_loss, epoch_acc))
            #   重置
            running_loss = 0.0
            running_corrects = 0
            
            #   如果是验证集，更新最佳模型
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model = copy.deepcopy(model)
                
        scheduler.step()

    return best_model, best_acc

# 3.5使用torchvision微调模型

* 微调指使用与训练模型，包含以下三个操作
* 1. 替换输出层
* 2. 训练输出层
* 3. 训练输出层之前的层
* 这里首先先展示普通训练

In [None]:
import torch
import torchvision.models as models

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

model = models.resnet18(pretrained=True)

#   替换输出层
num_features = model.fc.in_features
model.fc = torch.nn.Linear(num_features, 2)

model = model.to(device)

#   损失函数为交叉熵
criterion = torch.nn.CrossEntropyLoss()
#   优化器为SGD，所有参数参与训练
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
#   每7个epoch，学习率乘以0.1
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

best_acc, best_model = train_model(model, criterion, optimizer, scheduler, 5, device)

# 3.6观察模型预测结果

In [None]:
def visualize_model(model, num_images=6):
    # 变量中为布尔类型，用于记录模型是否处于训练状态
    was_training = model.training
    model.eval()
    # 用于指示什么时候停止
    images_so_far = 0
    # 创建一个新的图形
    fig = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloader['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                # 创建一个子图
                # 参数指定了子图的布局，num_images//2表示子图的行数，2表示每行的列数
                # images_so_far表示当前子图的索引
                ax = plt.subplot(num_images//2, 2, images_so_far)
                # 关闭了当前子图的坐标轴
                ax.axis('off')
                # 设置了当前子图的标题
                ax.set_title('predicted: {}'.format(class_type[preds[j]]))
                # 显示当前子图的图像
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        #  恢复模型初始状态，model.train(mode=False) == model.eval()
        model.train(mode=was_training)

In [None]:
visualize_model(best_model)

# 3.7固定模型的参数
* 微调/迁移/预训练模型，让输出层之外的层不参与训练，看作一个特征提取器，只训练输出层
* 利用参数的requires_grad设置为false

In [None]:
import torch
import torchvision

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

model = torchvision.models.resnet18(pretrained=True)
for param in model.parameters():
    param.requires_grad = False
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 2)
model = model.to(device)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.fc.parameters(), lr=0.001, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [None]:
model, _ = train_model(model, criterion, optimizer, scheduler, 5, device)

In [None]:
visualize_model(model)

# 3.8使用tensorboard可视化训练过程
* tensorboard可以查看训练过程中训练集与验证集中的损失值、准确率变化情况
* 需在训练过程中加入如下代码

In [None]:
from torch.utils.tensorboard import SummaryWriter

def train_with_board(model, criterion, optimizer, scheduler, num_epochs, device):
    best_acc = 0.0
    best_model = None
    running_loss = 0.0
    running_corrects = 0
    
    writer = SummaryWriter()
    ep_losses, ep_accs = [], []

    for epoch in tqdm(range(num_epochs)):
        for phase in ['train','val']:
            for inputs, labels in dataloader[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == labels.data)
            epoch_loss = running_loss / len(image_datasets[phase])
            epoch_acc = running_corrects / len(image_datasets[phase])
            ep_losses.append(epoch_loss)
            ep_accs.append(epoch_acc)

            print('Epoch %d %s loss: %.3f acc: %.3f' % (epoch, phase, epoch_loss, epoch_acc))

            running_loss = 0.0
            running_corrects = 0
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model = copy.deepcopy(model)
        
        writer.add_scalars('loss', {'train': ep_losses[-2], 'val': ep_losses[-1]}, global_step=epoch)
        writer.add_scalars('acc', {'train': ep_accs[-2], 'val': ep_accs[-1]}, global_step=epoch)

        scheduler.step()
        
    writer.close()

    return best_model, best_acc

In [None]:
import torch
import torchvision

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

model = torchvision.models.resnet18(pretrained=True)
for param in model.parameters():
    param.requires_grad = False
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 2)
model = model.to(device)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.fc.parameters(), lr=0.001, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

In [None]:
_, _ = train_with_board(model, criterion, optimizer, scheduler, 5, device)

* notebook有点过于慢了

In [15]:
!tensorboard --logdir runs

^C
