In [1]:
from models import YOLOv1
from data.VOC_Dataset import VOC_Dataset
from data import DATA_HOME

from ipdb import set_trace
from torch.utils.data import DataLoader
from numpy import array
from multiprocessing import cpu_count
import random 
import torch
import pandas as pd

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

random.seed(1)
torch.manual_seed(1)

<torch._C.Generator at 0x18d4d251070>

In [2]:
import platform
from sklearn.model_selection import train_test_split
from torch.utils.data import Subset

_voc_root = f"{DATA_HOME}/VOCdevkit/VOC2007"
voc_ds = VOC_Dataset(_voc_root)
train_indices, test_indices = train_test_split(list(range(len(voc_ds))), test_size=0.2, random_state=1)
train_set = Subset(voc_ds, train_indices)
test_set = Subset(voc_ds, test_indices)

BS = 2

def inf_test_gen(data_loader):
    while True:
        for id, batch in enumerate(data_loader):
            yield id, batch
        

def collate_fn(data):
    # output: 7 * 7 * 30
    # S * S * ((x, y, w, h, confidence) * B=2 + C=20)
    # make img batch and label batch
    imgs, labels, classes = zip(*data)
    # inhomegenous shape label, since each image has different number of objects
    # label's dimension: (Batch size, # of objects in each image, 4 coords)
    return torch.tensor(array(imgs), dtype=torch.float), labels, classes
    
if platform.system() == "Windows":
    train_loader = DataLoader(train_set, batch_size=BS, pin_memory=True, shuffle=True, collate_fn=collate_fn)
    test_loader = DataLoader(test_set, batch_size=BS, pin_memory=True, shuffle=True, collate_fn=collate_fn)
else:
    train_loader = DataLoader(train_set, batch_size=BS, pin_memory=True, shuffle=True, num_workers=4, collate_fn=collate_fn)
    test_loader = DataLoader(test_set, batch_size=BS, pin_memory=True, shuffle=True, num_workers=4, collate_fn=collate_fn)
test_loader = inf_test_gen(test_loader)

class dict:  {'bicycle': 0, 'train': 1, 'dog': 2, 'pottedplant': 3, 'horse': 4, 'cow': 5, 'person': 6, 'bus': 7, 'cat': 8, 'boat': 9, 'chair': 10, 'car': 11, 'bottle': 12, 'sofa': 13, 'tvmonitor': 14, 'aeroplane': 15, 'motorbike': 16, 'sheep': 17, 'diningtable': 18, 'bird': 19}


In [3]:

yolo = YOLOv1().to(device=device)

S = 7 # num of rows/cols
C = 20 # num of classes
B = 2 # num of bounding boxes

lamba_coord = 5
lamba_noobj = 0.5

In [4]:
from utils.metrics import IOU
from collections import defaultdict as dd

def yolo_loss(res_mat: torch.tensor, label_mat: list, class_mat: list, loss_df: pd.DataFrame):
    """calcalate batch yolo loss, @param res_mat: (batch_size, B*5+C, S, S)"""
    loss1 = torch.tensor(0.).to(device=device)
    loss2 = torch.tensor(0.).to(device=device)
    loss3 = torch.tensor(0.).to(device=device)
    loss4 = torch.tensor(0.).to(device=device)
    loss5 = torch.tensor(0.).to(device=device)
    # calculate loss for every bounding box in every cell
    for b, batch in enumerate(res_mat):
        # assign labels bbox to cell indices
        stride = 1. / S
        label_inds = dd(list)
        label_class = {}
        for idx, (x, y, w, h) in enumerate(label_mat[b]):
            xi, yi = int(x // stride), int(y // stride)
            label_inds[(xi, yi)].append((x, y, w, h))
            label_class[(x, y, w, h)] = class_mat[b][idx]
        
        # print("label class", label_class, b, batch.shape)
        # iterate cell and calculate loss
        for i in range(batch.shape[1]):
            for j in range(batch.shape[2]):
                cell = batch[:, i, j]

                if (i, j) in label_inds:
                    yprobs = torch.tensor([0.] * C).to(device=device)
                    for x, y, w, h in label_inds[i, j]:
                        for k in range(0, B*5, 5):
                            x_, y_, w_, h_, c_ = cell[k:k+5]
                            loss1 += lamba_coord * ((x-x_) ** 2 + (y-y_) ** 2) # yolo loss term 1
                            loss2 += lamba_coord * ((w - w_.sqrt())**2 + (h ** 0.5 - h_.sqrt())**2) # yolo loss term 2
                            loss3 += (IOU((x, y, w, h), (x_, y_, w_, h_)) - c_) ** 2 # yolo loss term 3
                        yprobs[label_class[x, y, w, h]] = 1.  
                    loss5 += ((yprobs - cell[-C:]) ** 2).sum() # yolo loss term 5
                else:
                    for k in range(4, B*5, 5):
                        _c = cell[k]
                        loss4 += _c ** 2 * lamba_noobj # yolo loss term 4
        # print("xywh: " , x_, y_, w_, h_, c_)
        # print("label: ", x, y, w, h)
    loss_df = pd.concat((loss_df, pd.DataFrame([[loss1.item(), loss2.item(), loss3.item(), loss4.item(), loss5.item()]], columns=loss_df.columns)), ignore_index=True)
    return loss1 + loss2 + loss3 + loss4 + loss5, loss_df
    # print("ret: ", res_mat.shape)
    

In [7]:
from sys import modules
import os
import torch.optim as optim
import matplotlib.pyplot as plt
import time

save_dir = os.path.expanduser(os.environ["YOLO_MODELS"])

optimizer = optim.Adam(yolo.parameters(), lr=1e-4)
torch.cuda.empty_cache()
train_loss_df = pd.DataFrame(columns=["l1", "l2", "l3", "l4", "l5"])
val_loss_df = pd.DataFrame(columns=["l1", "l2", "l3", "l4", "l5"])
print(f"model save_dir: {save_dir}")
print(f"train_set size: {len(train_set)}, val_set size: {len(test_set)}")
# calculate training time
start_time = time.time()
for epoch in range(100):
    for _id, (batch, labels, classes) in enumerate(train_loader):
        # clear gradient graph
        optimizer.zero_grad()

        # forward-propagate
        res = yolo(batch.to(device=device))
        # print(res.shape, len(labels[2]), classes[2])
        train_loss, train_loss_df = yolo_loss(res, labels, classes, train_loss_df)
        # set_trace

        # back-propagate
        train_loss.backward()
        optimizer.step()

        # calculate validation errors
        _, (batch, labels, classes) = next(test_loader)
        with torch.no_grad():
            res = yolo(batch.to(device=device))
            val_loss, val_loss_df = yolo_loss(res, labels, classes, val_loss_df)
            
        # record losses
        if epoch and epoch % 2 == 0 and _id == 0:

            checkpoint = {
                'model': yolo.state_dict(),
                'optimizer': optimizer.state_dict(),
                'iteration': _id,
                'train_loss': train_loss,
            }
            torch.save(checkpoint, f"{save_dir}/yolov1_{epoch}_{_id}.pth")
        
        # show time elapse
        if _id % 1000 == 0:
            # set_trace()
            print(f"Epoch {epoch}, iteration: {_id}, train loss: {train_loss}, val loss: {val_loss}")
            train_loss_df.to_csv(f"{save_dir}/train_loss_latest.csv")
            val_loss_df.to_csv(f"{save_dir}/val_loss_latest.csv")
            # end_time = time.time()
            # print("1000 iterations cost: {:.2f} seconds".format(end_time - start_time))
            # start_time = end_time
        # showing the image with labels
        # res_img = Image.fromarray((batch[0] * 255).permute(1, 2, 0).byte().numpy())
        # draw = ImageDraw.Draw(res_img)
        # for pc in labels[0]:
            # draw.rectangle((448*pc[0], 448*pc[1], 448*pc[2], 448*pc[3]), outline="red")
        # res_img.show()

    
    

model save_dir: C:\Users\sxing/YOLO_MODELS
train_set size: 4008, val_set size: 1003
> [1;32mc:\users\sxing\appdata\local\temp\ipykernel_13500\1547203702.py[0m(52)[0;36m<module>[1;34m()[0m

Epoch 0, iteration: 0, train loss: 55.62926483154297, val loss: 62.0623779296875
> [1;32mc:\users\sxing\appdata\local\temp\ipykernel_13500\1547203702.py[0m(52)[0;36m<module>[1;34m()[0m

tensor([[[[0.0458, 0.0325, 0.0515,  ..., 0.1471, 0.1056, 0.1711],
          [0.2044, 0.1750, 0.2352,  ..., 0.1821, 0.0569, 0.1141],
          [0.4751, 0.3372, 0.3488,  ..., 0.4909, 0.4191, 0.6553],
          ...,
          [0.6478, 0.6511, 0.5851,  ..., 0.6154, 0.4909, 0.6192],
          [0.8010, 0.8101, 0.7809,  ..., 0.8501, 0.6236, 0.7337],
          [0.8816, 0.9034, 0.9225,  ..., 0.8959, 0.9144, 0.8300]],

         [[0.0436, 0.2573, 0.3342,  ..., 0.5766, 0.6551, 0.8767],
          [0.0546, 0.2850, 0.3878,  ..., 0.5039, 0.8016, 0.8357],
          [0.0588, 0.2662, 0.3031,  ..., 0.6978, 0.8252, 0.8003],
    