In [1]:
import os
import time
import copy
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
import torch.backends.cudnn as cudnn
from nets.yolo4 import YoloBody
from nets.yolo_training import YOLOLoss, Generator

In [2]:
#---------------------------------------------------#
#   获得类和先验框
#---------------------------------------------------#
def get_classes(classes_path):
    '''loads the classes'''
    with open(classes_path) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names


def get_anchors(anchors_path):
    '''loads the anchors from a file'''
    with open(anchors_path) as f:
        anchors = f.readline()
    anchors = [float(x) for x in anchors.split(',')]
    return np.array(anchors).reshape([-1,3,2])[::-1,:,:]


#---------------------------------------------------#
#   训练一个epoch
#---------------------------------------------------#
def fit_one_epoch(net, yolo_losses, epoch, epoch_size, epoch_size_val, gen,genval, Epoch, cuda, optimizer, lr_scheduler):
    total_loss = 0
    val_loss = 0
    print('\n' + '-' * 10 + 'Train one epoch.' + '-' * 10)
    print('Epoch:'+ str(epoch+1) + '/' + str(Epoch))
    print('Start Training.')
    net.train()
    for iteration in range(epoch_size):
        start_time = time.time()
        images, targets = next(gen)
        with torch.no_grad():
            if cuda:
                images = Variable(torch.from_numpy(images).type(torch.FloatTensor)).cuda()
                targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets]
            else:
                images = Variable(torch.from_numpy(images).type(torch.FloatTensor))
                targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets]
        optimizer.zero_grad()
        outputs = net(images)
        losses = []
        for i in range(3):
            loss_item = yolo_losses[i](outputs[i], targets)
            losses.append(loss_item[0])
        loss = sum(losses)
        loss.backward()
        optimizer.step()
        lr_scheduler.step()

        total_loss += loss
        waste_time = time.time() - start_time
        if iteration == 0 or (iteration+1) % 10 == 0:
            print('step:' + str(iteration+1) + '/' + str(epoch_size) + ' || Total Loss: %.4f || %.4fs/step' % (total_loss/(iteration+1), waste_time))
    print('Finish Training.')
    '''        
    print('Start Validation.')
    net.eval()
    for iteration in range(epoch_size_val):
        images_val, targets_val = next(genval)

        with torch.no_grad():
            if cuda:
                images_val = Variable(torch.from_numpy(images_val).type(torch.FloatTensor)).cuda()
                targets_val = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets_val]
            else:
                images_val = Variable(torch.from_numpy(images_val).type(torch.FloatTensor))
                targets_val = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets_val]
            optimizer.zero_grad()
            outputs = net(images_val)
            losses = []
            for i in range(3):
                loss_item = yolo_losses[i](outputs[i], targets_val)
                losses.append(loss_item[0])
            loss = sum(losses)
            val_loss += loss
    print('Finish Validation')
    '''
    print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss/(epoch_size+1), val_loss/(epoch_size_val+1)))
    
    return total_loss/(epoch_size+1), val_loss/(epoch_size_val+1)

In [3]:
#-------------------------------#
#   输入的shape大小
#   显存比较小可以使用416x416
#   显存比较大可以使用608x608
#-------------------------------#
#input_shape = (416,416)
input_shape = (608, 608)

#-------------------------------#
#   tricks的使用设置
#-------------------------------#
Cosine_lr = True
mosaic = True
# 用于设定是否使用cuda
Cuda = True
smoooth_label = 0.03

#-------------------------------#
#   获得训练集和验证集的annotations
#   
#-------------------------------#
train_annotation_path = 'model_data/train.txt'
val_annotation_path = 'model_data/val.txt'

#-------------------------------#
#   获得先验框和类
#-------------------------------#
anchors_path = 'model_data/yolo_anchors.txt'
classes_path = 'model_data/mask_classes.txt'   
class_names = get_classes(classes_path)
anchors = get_anchors(anchors_path)
num_classes = len(class_names)

In [4]:
# 创建模型
model = YoloBody(len(anchors[0]), num_classes)
model_path = "model_data/yolov4_maskdetect_weights0.pth"
# model_path = "model_data/yolov4_maskdetect_weights0.pth"
# 加快模型训练的效率
print('Loading pretrained model weights.')
model_dict = model.state_dict()
pretrained_dict = torch.load(model_path)
pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v)}
model_dict.update(pretrained_dict)
model.load_state_dict(model_dict)
print('Finished!')

if Cuda:
    net = torch.nn.DataParallel(model)
    cudnn.benchmark = True
    net = net.cuda()

# 建立loss函数
yolo_losses = []
for i in range(3):
    yolo_losses.append(YOLOLoss(np.reshape(anchors, [-1,2]), num_classes, \
                                (input_shape[1], input_shape[0]), smoooth_label, Cuda))
# read train lines and val lines
with open(train_annotation_path) as f:
    train_lines = f.readlines()
with open(val_annotation_path) as f:
    val_lines = f.readlines()
num_train = len(train_lines)
num_val = len(val_lines)

Loading pretrained model weights.
Finished!


In [5]:
#------------------------------------#
#   先冻结backbone训练
#------------------------------------#
lr = 1e-3
Batch_size = 4
Init_Epoch = 0
Freeze_Epoch = 25
        
optimizer = optim.Adam(net.parameters(), lr, weight_decay=5e-4)
if Cosine_lr:
    lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5, eta_min=1e-5)
else:
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)

gen = Generator(Batch_size, train_lines, (input_shape[0], input_shape[1])).generate(mosaic = mosaic)
gen_val = Generator(Batch_size, val_lines, (input_shape[0], input_shape[1])).generate(mosaic = False)
                        
epoch_size = int(max(1, num_train//Batch_size//2.5)) if mosaic else max(1, num_train//Batch_size)
epoch_size_val = num_val//Batch_size
for param in model.backbone.parameters():
    param.requires_grad = False

best_loss = 99999999.0
best_model_weights = copy.deepcopy(net.state_dict())
for epoch in range(Init_Epoch, Freeze_Epoch):
    total_loss, val_loss = fit_one_epoch(net, yolo_losses, epoch, epoch_size, epoch_size_val, gen, gen_val, 
                                         Freeze_Epoch, Cuda, optimizer, lr_scheduler)
    if total_loss < best_loss:
        best_loss = total_loss
        best_model_weights = copy.deepcopy(model.state_dict())
    with open('total_loss.csv', mode='a+') as total_loss_file:
        total_loss_file.write(str(total_loss.item()) + '\n')
    #with open('val_loss.csv', mode='a+') as val_loss_file:
    #    val_loss_file.write(str(val_loss.item()) + '\n')
torch.save(best_model_weights, 'model_data/yolov4_maskdetect_weights0.pth')


----------Train one epoch.----------
Epoch:1/25
Start Training.


  tmp_targets = np.array(targets)
  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


step:1/80 || Total Loss: 15015.4355 || 6.3311s/step
step:10/80 || Total Loss: 11414.8857 || 1.5895s/step
step:20/80 || Total Loss: 8910.9219 || 1.4485s/step
step:30/80 || Total Loss: 7188.3159 || 1.2213s/step
step:40/80 || Total Loss: 5974.7778 || 2.8243s/step
step:50/80 || Total Loss: 5096.2876 || 1.5680s/step
step:60/80 || Total Loss: 4436.5747 || 1.4322s/step
step:70/80 || Total Loss: 3930.0063 || 1.5138s/step
step:80/80 || Total Loss: 3527.0259 || 1.3748s/step
Finish Training.
Total Loss: 3483.4822 || Val Loss: 0.0000 

----------Train one epoch.----------
Epoch:2/25
Start Training.
step:1/80 || Total Loss: 648.2054 || 1.1680s/step
step:10/80 || Total Loss: 589.9666 || 1.5796s/step
step:20/80 || Total Loss: 559.1953 || 1.5327s/step
step:30/80 || Total Loss: 521.5851 || 1.4069s/step
step:40/80 || Total Loss: 494.2169 || 1.3941s/step
step:50/80 || Total Loss: 463.7503 || 1.4807s/step
step:60/80 || Total Loss: 437.4377 || 1.3825s/step
step:70/80 || Total Loss: 413.6038 || 1.8304s/step

In [6]:
#------------------------------------#
#   解冻backbone后训练
#------------------------------------#
lr = 1e-4
Batch_size = 2
Freeze_Epoch = 25
Unfreeze_Epoch = 40

optimizer = optim.Adam(net.parameters(), lr, weight_decay=5e-4)
if Cosine_lr:
    lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5, eta_min=1e-5)
else:
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)

gen = Generator(Batch_size, train_lines, (input_shape[0], input_shape[1])).generate(mosaic = mosaic)
gen_val = Generator(Batch_size, val_lines, (input_shape[0], input_shape[1])).generate(mosaic = False)
                        
epoch_size = int(max(1, num_train//Batch_size//2.5)) if mosaic else max(1, num_train//Batch_size)
epoch_size_val = num_val//Batch_size
for param in model.backbone.parameters():
    param.requires_grad = True

best_loss = 21.091846466064453
    
for epoch in range(Freeze_Epoch, Unfreeze_Epoch):
    total_loss, val_loss = fit_one_epoch(net, yolo_losses, epoch, epoch_size, epoch_size_val, gen, gen_val, 
                                         Unfreeze_Epoch, Cuda, optimizer, lr_scheduler)
    if total_loss < best_loss:
        best_loss = total_loss
        best_model_weights = copy.deepcopy(model.state_dict())
    with open('total_loss.csv', mode='a+') as total_loss_file:
        total_loss_file.write(str(total_loss.item()) + '\n')
    #with open('val_loss.csv', mode='a+') as val_loss_file:
    #    val_loss_file.write(str(val_loss.item() + '\n')
torch.save(best_model_weights, 'model_data/yolov4_maskdetect_weights1.pth')


----------Train one epoch.----------
Epoch:26/40
Start Training.
step:1/160 || Total Loss: 8.7125 || 1.0300s/step
step:10/160 || Total Loss: 10.7137 || 0.8823s/step
step:20/160 || Total Loss: 17.1531 || 1.3484s/step
step:30/160 || Total Loss: 14.8680 || 0.8271s/step
step:40/160 || Total Loss: 17.0336 || 0.7746s/step
step:50/160 || Total Loss: 16.2276 || 1.0037s/step
step:60/160 || Total Loss: 17.0040 || 0.8280s/step
step:70/160 || Total Loss: 18.9229 || 0.8763s/step
step:80/160 || Total Loss: 19.3642 || 0.7406s/step
step:90/160 || Total Loss: 18.4085 || 0.9709s/step
step:100/160 || Total Loss: 17.4130 || 0.8443s/step
step:110/160 || Total Loss: 16.4845 || 0.8390s/step
step:120/160 || Total Loss: 16.2463 || 0.8858s/step
step:130/160 || Total Loss: 15.9496 || 0.7082s/step
step:140/160 || Total Loss: 16.2377 || 0.8815s/step
step:150/160 || Total Loss: 16.0274 || 0.8802s/step
step:160/160 || Total Loss: 16.2330 || 1.1385s/step
Finish Training.
Total Loss: 16.1321 || Val Loss: 0.0000 

---