In [6]:
import sys, os
sys.path.append("..")
from models import YOLOv1
from data.VOC_Dataset import VOC_Dataset
from common_utils import DATA_HOME

from ipdb import set_trace
from torch.utils.data import DataLoader
from multiprocessing import cpu_count
import random 
import torch
import pandas as pd
import numpy as np

# torch.multiprocessing.set_start_method("spawn", force=True)
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

random.seed(1)
torch.manual_seed(1)

<torch._C.Generator at 0x247180510d0>

In [7]:
import platform
from sklearn.model_selection import train_test_split
from torch.utils.data import Subset

_voc_root = f"{DATA_HOME}/VOCdevkit/VOC2007"
voc_ds = VOC_Dataset(_voc_root)
train_indices, test_indices = train_test_split(list(range(len(voc_ds))), test_size=0.2, random_state=1)
train_set = Subset(voc_ds, train_indices)
test_set = Subset(voc_ds, test_indices)

BS = 2 # batch size
S = 7 # num of rows/cols
C = 20 # num of classes
B = 2 # num of bounding boxes
BOX = 5 # num of values for each bbox

lambda_coord = 5
lambda_noobj = 1

def inf_test_gen(data_loader):
    while True:
        for id, batch in enumerate(data_loader):
            yield id, batch
        

def collate_fn(data):
    # output: 7 * 7 * 30
    # S * S * ((x, y, w, h, confidence) * B=2 + C=20)
    # make img batch and label batch
    imgs, labels, classes = zip(*data)
    imgs = torch.tensor(np.array(imgs), dtype=torch.float16)
    stride = 1. / S
    label_mat = torch.zeros((len(imgs), BOX * B + C, S, S), dtype=torch.float16)
    for i in range(len(labels)):
        for j in range(len(labels[i])):
            x, y, _, _= labels[i][j]
            xi, yi = int(x // stride), int(y // stride)
            cell = label_mat[i, :, xi, yi]
            cell[:5] = cell[5:10] = torch.tensor([*labels[i][j], 1], dtype=torch.float16) # assign coords
            cell[BOX * B + classes[i][j]] = 1.0 # assign class
    return imgs, label_mat
    
if platform.system() == "Windows":
    train_loader = DataLoader(train_set, batch_size=BS, pin_memory=False, shuffle=True, collate_fn=collate_fn)
    test_loader = DataLoader(test_set, batch_size=1, pin_memory=False, shuffle=True, collate_fn=collate_fn)
    save_dir = os.path.expanduser("~/YOLO_MODELS")
else:
    train_loader = DataLoader(train_set, batch_size=BS, pin_memory=False, shuffle=True, num_workers=1, collate_fn=collate_fn)
    test_loader = DataLoader(test_set, batch_size=1, pin_memory=False, shuffle=True, num_workers=1, collate_fn=collate_fn)
    save_dir = os.path.expanduser(os.environ["YOLO_MODELS"])
test_loader = inf_test_gen(test_loader)

class dict:  {'aeroplane': 0, 'bicycle': 1, 'bird': 2, 'boat': 3, 'bottle': 4, 'bus': 5, 'car': 6, 'cat': 7, 'chair': 8, 'cow': 9, 'diningtable': 10, 'dog': 11, 'horse': 12, 'motorbike': 13, 'person': 14, 'pottedplant': 15, 'sheep': 16, 'sofa': 17, 'train': 18, 'tvmonitor': 19}


In [8]:
yolo = YOLOv1().to(device=device).half()

# Apply initialization to the model's weights
# for name, param in yolo.named_parameters():
#     if 'weight' in name:
#         torch.nn.init.kaiming_normal_(param)

In [9]:
from utils.metrics import IOU
from collections import defaultdict as dd

mse_loss = torch.nn.MSELoss(reduction="none")
def yolo_loss(yhat: torch.Tensor, y: torch.Tensor, loss_df: pd.DataFrame):
    """calcalate batch yolo loss, @param res_mat: (batch_size, B*5+C, S, S)"""
    loss1 = loss2 = loss3 = loss4 = loss5 = 0.
    # calculate loss for every bounding box in every cell
    loss1 = (y[:, 4, :, :] * (lambda_coord * mse_loss(yhat[:, :2, :, :], y[:, :2, :, :]) + mse_loss(yhat[:, 5:7, :, :], y[:, 5:7, :, :]))).sum()
    loss2 = (y[:, 4, :, :] * (lambda_coord * (yhat[:, 2:4, :, :].sqrt() - y[:, 2:4, :, :].sqrt()) ** 2 + (yhat[:, 7:9, :, :].sqrt() - y[:, 7:9, :, :].sqrt()) ** 2)).sum()
    loss3 = (y[:, 4, :, :] * mse_loss(yhat[:, 4, :, :], y[:, 4, :, :])).sum()
    loss4 = lambda_noobj * (yhat[:, 4, :, :][y[:, 4, :, :]==0] ** 2 + yhat[:, 9, :, :][y[:, 9, :, :]==0] ** 2).sum()
    obj_mask = (y[:, 4, :, :] == 1).unsqueeze(1).expand(-1, 20, -1, -1)
    loss5 = mse_loss(y[:, -C:, :, :][obj_mask], yhat[:, -C:, :, :][obj_mask]).sum()

    loss_df = pd.concat((loss_df, pd.DataFrame([[loss1.item(), loss2.item(), loss3.item(), loss4.item(), loss5.item()]], columns=loss_df.columns)), ignore_index=True)
    return loss1 + loss2 + loss3 + loss4 + loss5, loss_df
    

In [10]:
from sys import modules
import torch.optim as optim
import matplotlib.pyplot as plt
import time

# IO
train_loss_fname = "train_loss_latest.csv"
val_loss_fname = "val_loss_latest.csv"
train_loss_df = pd.DataFrame(columns=["l1", "l2", "l3", "l4", "l5"])
val_loss_df = pd.DataFrame(columns=["l1", "l2", "l3", "l4", "l5"])
print(f"model save_dir: {save_dir}")
print(f"train_set size: {len(train_set)}, val_set size: {len(test_set)}")
files = [f for f in os.listdir(save_dir) if ".pth" in f]
epoch = 0

if files:
    max_f = max(files)
    epoch = int(max_f.split('_')[1])
    checkpoint_pth = save_dir+"/"+max_f
    yolo.load_state_dict(torch.load(checkpoint_pth)["model"])
    print("loaded model checkpoint: ", checkpoint_pth)
    train_loss_df = pd.read_csv(f"{save_dir}/{train_loss_fname}", index_col=None)
    val_loss_df = pd.read_csv(f"{save_dir}/{val_loss_fname}", index_col=None)
    print("loaded model loss dataframe")

for param in yolo.features.parameters():
    param.requires_grad = False
optimizer = optim.SGD(yolo.parameters(), lr=1e-4, momentum=0.9, weight_decay=5e-4)
torch.cuda.empty_cache()
# calculate training time
start_time = time.time()
while epoch < 100:
    for _id, (img, label) in enumerate(train_loader):
        if len(img) != len(label): continue # skip incomplete batch at last
        # clear gradient graph
        optimizer.zero_grad()

        # forward-propagate
        res = yolo(img.to(device=device))
        # print(res.shape, len(labels[2]), classes[2])
        train_loss, train_loss_df = yolo_loss(res, label.to(device), train_loss_df)
        # set_trace

        # back-propagate
        train_loss.backward()
        optimizer.step()

        # calculate validation errors
        _, (t_img, t_label) = next(test_loader)
        with torch.no_grad():
            res = yolo(t_img.to(device=device))
            val_loss, val_loss_df = yolo_loss(res, t_label.to(device), val_loss_df)
            
        # perform IO
        if epoch and epoch % 2 and _id == 1000:
            # save model 
            checkpoint = {
                'model': yolo.state_dict(),
                'optimizer': optimizer.state_dict(),
                'iteration': _id,
                'train_loss': train_loss,
            }
            torch.save(checkpoint, f"{save_dir}/yolov1_{epoch}_{_id}.pth")
            
            # show time elapse
            print(f"Epoch {epoch}, iteration: {_id}, train loss: {train_loss}, val loss: {val_loss}")
            train_loss_df.to_csv(f"{save_dir}/{train_loss_fname}")
            val_loss_df.to_csv(f"{save_dir}/{val_loss_fname}")
    epoch += 1
    
        # if _id == 2000:
            # set_trace()
            # end_time = time.time()
            # print("1000 iterations cost: {:.2f} seconds".format(end_time - start_time))
            # start_time = end_time
        # showing the image with labels
        # res_img = Image.fromarray((batch[0] * 255).permute(1, 2, 0).byte().numpy())
        # draw = ImageDraw.Draw(res_img)
        # for pc in labels[0]:
            # draw.rectangle((448*pc[0], 448*pc[1], 448*pc[2], 448*pc[3]), outline="red")
        # res_img.show()



model save_dir: C:\Users\sxing/YOLO_MODELS
train_set size: 4008, val_set size: 1003
Epoch 1, iteration: 1000, train loss: 20.8125, val loss: 12.3359375


KeyboardInterrupt: 