**Train YOLO model**

Original Author: Bubbliiiing

Edit / Rewrite / Comment By: Hammond Liu (hl3797)

Github Link: [yolov4-pytorch](https://github.com/bubbliiiing/yolov4-pytorch)


In [None]:
# Mount to Google Drive to train on Colab
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Switch to the working path of this project
from os import chdir, listdir

print(listdir('./'))

chdir('/content/drive/MyDrive/Sem_3/ML/F20_ML_final_project/yolo/')

print(listdir('./'))

['.config', 'drive', 'sample_data']
['utils', 'yolo.py', 'nets', 'data', 'yolo_ann.ipynb', 'train.ipynb']


In [None]:
import os
import time
import copy
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
import torch.backends.cudnn as cudnn
from nets.yolo4 import YoloBody
from nets.yolo_training import YOLOLoss, Generator

In [None]:
# Read the format data info from input files
def get_classes(classes_path):
    '''loads the classes'''
    with open(classes_path) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names

def get_anchors(anchors_path):
    '''loads the anchors from a file'''
    with open(anchors_path) as f:
        anchors = f.readline()
    anchors = [float(x) for x in anchors.split(',')]
    return np.array(anchors).reshape([-1,3,2])[::-1,:,:]

anchors_path = './data/anchors.txt'
classes_path = './data/classes.txt'   
class_names = get_classes(classes_path)
anchors = get_anchors(anchors_path)
num_classes = len(class_names)

In [None]:
# Train one epoch
def fit_one_epoch(net, yolo_losses, epoch, epoch_size, epoch_size_val, gen,genval, Epoch, cuda, optimizer, lr_scheduler):
    total_loss = 0
    val_loss = 0
    print('\n' + '-' * 10 + 'Train one epoch.' + '-' * 10)
    print('Epoch:'+ str(epoch+1) + '/' + str(Epoch))
    print('Start Training.')
    net.train()
    for iteration in range(epoch_size):
        start_time = time.time()
        images, targets = next(gen)
        with torch.no_grad():
            if cuda:
                images = Variable(torch.from_numpy(images).type(torch.FloatTensor)).cuda()
                targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets]
            else:
                images = Variable(torch.from_numpy(images).type(torch.FloatTensor))
                targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets]
        optimizer.zero_grad()
        outputs = net(images)
        losses = []
        for i in range(3):
            loss_item = yolo_losses[i](outputs[i], targets)
            losses.append(loss_item[0])
        loss = sum(losses)
        loss.backward()
        optimizer.step()
        lr_scheduler.step()

        total_loss += loss
        waste_time = time.time() - start_time
        if iteration == 0 or (iteration+1) % 10 == 0:
            print('step:' + str(iteration+1) + '/' + str(epoch_size) + ' || Total Loss: %.4f || %.4fs/step' % (total_loss/(iteration+1), waste_time))
    print('Finish Training.')
            
    # print('Start Validation.')
    # net.eval()
    # for iteration in range(epoch_size_val):
    #     images_val, targets_val = next(genval)

    #     with torch.no_grad():
    #         if cuda:
    #             images_val = Variable(torch.from_numpy(images_val).type(torch.FloatTensor)).cuda()
    #             targets_val = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets_val]
    #         else:
    #             images_val = Variable(torch.from_numpy(images_val).type(torch.FloatTensor))
    #             targets_val = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets_val]
    #         optimizer.zero_grad()
    #         outputs = net(images_val)
    #         losses = []
    #         for i in range(3):
    #             loss_item = yolo_losses[i](outputs[i], targets_val)
    #             losses.append(loss_item[0])
    #         loss = sum(losses)
    #         val_loss += loss
    # print('Finish Validation')
    
    print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss/(epoch_size+1), val_loss/(epoch_size_val+1)))
    
    return total_loss/(epoch_size+1), val_loss/(epoch_size_val+1)

In [None]:
# Initialize variables

input_shape = (416,416)
# input_shape = (608, 608)

# Apply [cosine lr decay] and [mosaic data augment]
Cosine_lr = True
mosaic = True

Cuda = True
smoooth_label = 0.03

train_annotation_path = './data/train_data.txt'
val_annotation_path = './data/val_data.txt'

In [None]:
# Create the model & Load pretrained weights
model = YoloBody(len(anchors[0]), num_classes)

# model_path = "./data/yolov4_coco_pretrained_weights.pth"
model_path = "./data/yolov4_maskdetect_weights1.pth"
print('Loading pretrained model weights.')
model_dict = model.state_dict()
pretrained_dict = torch.load(model_path)
pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v)}
model_dict.update(pretrained_dict)
model.load_state_dict(model_dict)
print('Finished!')

# Use cuda for training
if Cuda:
    net = torch.nn.DataParallel(model)
    cudnn.benchmark = True
    net = net.cuda()

# Set up YOLO loss function
yolo_losses = []
for i in range(3):
    yolo_losses.append(YOLOLoss(np.reshape(anchors, [-1,2]), num_classes, \
                                (input_shape[1], input_shape[0]), smoooth_label, Cuda))

# Read train & val data
with open(train_annotation_path) as f:
    train_lines = f.readlines()
with open(val_annotation_path) as f:
    val_lines = f.readlines()
num_train = len(train_lines)
num_val = len(val_lines)

Loading pretrained model weights.
Finished!


In [None]:
# Freeze the backbone and train 25 epochs
lr = 1e-3
Batch_size = 4
Init_Epoch = 0
Freeze_Epoch = 25

# Use Adam optimization & Apply cosine annealing lr
optimizer = optim.Adam(net.parameters(), lr, weight_decay=5e-4)
if Cosine_lr:
    lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5, eta_min=1e-5)
else:
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)

# Apply mosaic data augment
gen = Generator(Batch_size, train_lines, (input_shape[0], input_shape[1])).generate(mosaic = mosaic)
gen_val = Generator(Batch_size, val_lines, (input_shape[0], input_shape[1])).generate(mosaic = False)

# Compute epoch size & Set requires_grad
epoch_size = int(max(1, num_train // Batch_size // 2.5)) if mosaic else max(1, num_train // Batch_size)
epoch_size_val = num_val // Batch_size
for param in model.backbone.parameters():
    param.requires_grad = False

# Training
best_loss = 99999999.0
best_model_weights = copy.deepcopy(net.state_dict())
for epoch in range(Init_Epoch, Freeze_Epoch):
    # Fit one epoch
    total_loss, val_loss = fit_one_epoch(net, yolo_losses, epoch, epoch_size, epoch_size_val, gen, gen_val, 
                                         Freeze_Epoch, Cuda, optimizer, lr_scheduler)
    # Update the best loss if needed
    if total_loss < best_loss:
        best_loss = total_loss
        best_model_weights = copy.deepcopy(model.state_dict())
    # Dump the loss data
    with open('total_loss.csv', mode='a+') as total_loss_file:
        total_loss_file.write(str(total_loss.item()) + '\n')
    # with open('val_loss.csv', mode='a+') as val_loss_file:
    #     val_loss_file.write(str(val_loss.item()) + '\n')

# Save the weights for the first 25 epochs
torch.save(best_model_weights, './data/yolov4_maskdetect_weights_test.pth')


----------Train one epoch.----------
Epoch:1/25
Start Training.
step:1/46 || Total Loss: 7342.2622 || 10.6964s/step
step:10/46 || Total Loss: 5057.4844 || 2.9515s/step
step:20/46 || Total Loss: 3672.5044 || 3.0325s/step
step:30/46 || Total Loss: 2847.1348 || 2.3049s/step
step:40/46 || Total Loss: 2323.5850 || 2.4431s/step
Finish Training.
Total Loss: 2047.7594 || Val Loss: 0.0000 

----------Train one epoch.----------
Epoch:2/25
Start Training.
step:1/46 || Total Loss: 514.1393 || 2.4384s/step
step:10/46 || Total Loss: 470.6038 || 2.8944s/step
step:20/46 || Total Loss: 424.4568 || 2.7061s/step
step:30/46 || Total Loss: 390.1014 || 2.5794s/step
step:40/46 || Total Loss: 361.4504 || 2.6532s/step
Finish Training.
Total Loss: 340.6440 || Val Loss: 0.0000 

----------Train one epoch.----------
Epoch:3/25
Start Training.
step:1/46 || Total Loss: 223.5703 || 2.6569s/step
step:10/46 || Total Loss: 216.0468 || 2.5312s/step
step:20/46 || Total Loss: 209.2867 || 2.7817s/step
step:30/46 || Total 

In [None]:
# Unfreeze the backbone and train another 25 epochs
lr = 1e-4
Batch_size = 2
Freeze_Epoch = 25
Unfreeze_Epoch = 50

# Use Adam optimization & Apply cosine annealing lr
optimizer = optim.Adam(net.parameters(), lr, weight_decay=5e-4)
if Cosine_lr:
    lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5, eta_min=1e-5)
else:
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)

# Apply mosaic data augment
gen = Generator(Batch_size, train_lines, (input_shape[0], input_shape[1])).generate(mosaic = mosaic)
gen_val = Generator(Batch_size, val_lines, (input_shape[0], input_shape[1])).generate(mosaic = False)

# Compute epoch size & Set requires_grad    
epoch_size = int(max(1, num_train//Batch_size//2.5)) if mosaic else max(1, num_train//Batch_size)
epoch_size_val = num_val//Batch_size
for param in model.backbone.parameters():
    param.requires_grad = True

# Training
for epoch in range(Freeze_Epoch, Unfreeze_Epoch):
    # Fit one epoch
    total_loss, val_loss = fit_one_epoch(net, yolo_losses, epoch, epoch_size, epoch_size_val, gen, gen_val, 
                                         Unfreeze_Epoch, Cuda, optimizer, lr_scheduler)
    # Update the best loss (if needed)
    if total_loss < best_loss:
        best_loss = total_loss
        best_model_weights = copy.deepcopy(model.state_dict())
    # Dump the loss data
    with open('total_loss.csv', mode='a+') as total_loss_file:
        total_loss_file.write(str(total_loss.item()) + '\n')
    # with open('val_loss.csv', mode='a+') as val_loss_file:
    #     val_loss_file.write(str(val_loss.item() + '\n')

# Save the final weights
torch.save(best_model_weights, './data/yolov4_maskdetect_weights_test_.pth')


----------Train one epoch.----------
Epoch:26/50
Start Training.
step:1/94 || Total Loss: 40.2553 || 0.6579s/step
step:10/94 || Total Loss: 17.9560 || 0.5680s/step
step:20/94 || Total Loss: 18.6807 || 0.4828s/step
step:30/94 || Total Loss: 20.3428 || 0.4913s/step
step:40/94 || Total Loss: 18.8505 || 0.5542s/step
step:50/94 || Total Loss: 17.7775 || 0.5225s/step
step:60/94 || Total Loss: 16.9284 || 0.4939s/step
step:70/94 || Total Loss: 16.8763 || 0.5276s/step
step:80/94 || Total Loss: 16.6902 || 0.4967s/step
step:90/94 || Total Loss: 16.2073 || 0.5372s/step
Finish Training.
Total Loss: 15.8286 || Val Loss: 0.0000 

----------Train one epoch.----------
Epoch:27/50
Start Training.
step:1/94 || Total Loss: 12.2534 || 0.5120s/step
step:10/94 || Total Loss: 11.4258 || 0.5160s/step
step:20/94 || Total Loss: 13.4062 || 0.5503s/step
step:30/94 || Total Loss: 14.2555 || 0.5935s/step
step:40/94 || Total Loss: 13.8369 || 0.4738s/step
step:50/94 || Total Loss: 13.4154 || 0.4857s/step
step:60/94 |