In [6]:
import sys, os
sys.path.append("..")
from yolo.models import YOLOv1
from yolo.data.VOC_Dataset import VOC_Dataset
from data import DATA_HOME

from ipdb import set_trace
from torch.utils.data import DataLoader
from numpy import array
from multiprocessing import cpu_count
import random 
import torch

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

random.seed(1)
torch.manual_seed(1)

<torch._C.Generator at 0x1ef9422a8b0>

In [7]:
import platform

_voc_root = f"{DATA_HOME}\VOCdevkit\VOC2007"
voc_ds = VOC_Dataset(_voc_root)

def collate_fn(data):
    # output: 7 * 7 * 30
    # S * S * ((x, y, w, h, confidence) * B=2 + C=20)
    # make img batch and label batch
    imgs, labels, classes = zip(*data)
    # inhomegenous shape label, since each image has different number of objects
    # label's dimension: (Batch size, # of objects in each image, 4 coords)
    return torch.tensor(array(imgs), dtype=torch.float), labels, classes
    
if platform.system() == "Windows":
    loader = DataLoader(voc_ds, batch_size=4, pin_memory=True, shuffle=True, collate_fn=collate_fn)
else:
    loader = DataLoader(voc_ds, batch_size=4, pin_memory=True, shuffle=True, num_workers=4, collate_fn=collate_fn)

class dict:  {'bus': 0, 'chair': 1, 'person': 2, 'sofa': 3, 'diningtable': 4, 'motorbike': 5, 'aeroplane': 6, 'dog': 7, 'bird': 8, 'train': 9, 'horse': 10, 'car': 11, 'cow': 12, 'bicycle': 13, 'sheep': 14, 'pottedplant': 15, 'boat': 16, 'tvmonitor': 17, 'bottle': 18, 'cat': 19}


In [8]:

test = torch.randn(3, 448, 448).unsqueeze(0).to(device=device)
yolo = YOLOv1().to(device=device)

S = 7 # num of rows/cols
C = 20 # num of classes
B = 2 # num of bounding boxes

lamba_coord = 5
lamba_noobj = 0.5


def IOU(output, label):
    """Calculate the intersection over union of two sets rectangles

    Keyword arguments
    both output and label are (x, y, w, h)
    """
    
    output = (
        output[0] - output[2] / 2,
        output[1] - output[3] / 2,
        output[0] + output[2] / 2,
        output[1] + output[3] / 2,
    )

    label = (
        label[0] - label[2] / 2,
        label[1] - label[3] / 2,
        label[0] + label[2] / 2,
        label[1] + label[3] / 2,
    )

    x_inter = min(output[2], label[2]) - max(output[0], label[0])
    y_inter = min(output[3], label[3]) - max(output[1], label[1])

    if x_inter <= 0.0 or y_inter <= 0.0:
        return 0.0

    intersection = x_inter * y_inter

    overlapped_union = (output[2] - output[0]) * (output[3] - output[1]) + (label[2] - label[0]) * (
        label[3] - label[1]
    )
    
    return intersection / (overlapped_union - intersection)

# sanity checks of IOU
coords = (0.25, 0.25, 0.5, 0.5)
y_coords1 = (0.5, 0.575, 0.5, 0.35)
y_coords2 = (0.575, 0.5, 0.35, 0.5)
y_coords3 = (0.25, 0.25, 0.5, 0.5)
y_coords4 = (0.25, 0, 0.5, 0)
y_coords5 = (0.625, 0.375, 0.25, 0.75)
y_coords6 = (0.2, 0.25, 0.2, 0.3)

def float_eqs(a, b, decimal_pt):
    eps = 10 ** (-decimal_pt)
    return abs(a-b) < eps

assert float_eqs(IOU(coords, y_coords1), 0.025 / (0.5*0.5 + 0.5*0.35 - 0.025), 5)
assert float_eqs(IOU(coords, y_coords2), 0.025 / (0.5*0.5 + 0.5*0.35 - 0.025), 5)
assert float_eqs(IOU(coords, y_coords3), 1, 5)
assert float_eqs(IOU(coords, y_coords4), 0, 5)
assert float_eqs(IOU(coords, y_coords5), 0, 5)
assert float_eqs(IOU(coords, y_coords6), 0.06 / (0.5*0.5), 5)

In [36]:
from collections import defaultdict as dd

def yolo_loss(res_mat: torch.tensor, label_mat: list, class_mat: list):
    """calcalate batch yolo loss, @param res_mat: (batch_size, B*5+C, S, S)"""
    loss = torch.tensor(0.).to(device=device)
    
    # calculate loss for every bounding box in every cell
    for b, batch in enumerate(res_mat):
        # assign labels bbox to cell indices
        stride = 1. / S
        label_inds = dd(list)
        label_class = {}
        for idx, (x, y, w, h) in enumerate(label_mat[b]):
            xi, yi = int(x // stride), int(y // stride)
            label_inds[(xi, yi)].append((x, y, w, h))
            label_class[(x, y, w, h)] = class_mat[b][idx]
        
        print("label inds", label_inds, b, batch.shape)
        # iterate cell and calculate loss
        for i in range(batch.shape[1]):
            for j in range(batch.shape[2]):
                cell = batch[i, j]
                for x, y, w, h in label_inds[i, j]:
                    for k in range(0, B*2, 5):
                        x_, y_, w_, h_, c_ = cell[k:k+5]
                        w_, h_ = torch.max(torch.tensor(0.), w_), torch.max(torch.tensor(0.), h_)
                        loss += (x-x_) ** 2 + (y-y_) ** 2 # yolo loss term 1
                        loss += (w ** 0.5 - w_ ** 0.5)**2 + (h ** 0.5 - h_ ** 0.5)**2 # yolo loss term 2
                        loss += (IOU((x, y, w, h), (x_, y_, w_, h_)) - c_) ** 2
                        # c_label_embed = torch.tensor([0.] * B)
                        # c_label_embed[c] = 1
                        # c_output_embed = torch.tensor([0.] * B)
                        # c_output_embed[c] = 1
                        # loss += c_embed[]
                        # print("labels:k ", x, y, w, h)
                        # print("output: ", x_, y_, w_, h_, c_) 
        
    return loss
    # print("ret: ", res_mat.shape)
    

In [38]:
from PIL import Image, ImageDraw

cnt = 0
for _id, sample in enumerate(loader):
    batch, labels, classes = sample
    res = yolo(batch.to(device=device))
    print(res.shape, len(labels[2]), classes[2])
    loss = yolo_loss(res, labels, classes)

    # showing the image with labels
    # res_img = Image.fromarray((batch[0] * 255).permute(1, 2, 0).byte().numpy())
    # draw = ImageDraw.Draw(res_img)
    # for pc in labels[0]:
    #     draw.rectangle((448*pc[0], 448*pc[1], 448*pc[2], 448*pc[3]), outline="red")
    # res_img.show()
    print("loss: ", loss)
    break
    

torch.Size([4, 30, 7, 7]) 1 [5]
label inds defaultdict(<class 'list'>, {(0, 0): [(0.002688172043010753, 0.028, 0.8709677419354839, 1.0)]}) 0 torch.Size([30, 7, 7])
labels:k  0.002688172043010753 0.028 0.8709677419354839 1.0
output:  tensor(0.0073, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-0.0003, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.0052, device='cuda:0', grad_fn=<MaximumBackward0>) tensor(0., device='cuda:0', grad_fn=<MaximumBackward0>) tensor(0.0083, device='cuda:0', grad_fn=<UnbindBackward0>)
loss:  tensor(1.7420, device='cuda:0', grad_fn=<AddBackward0>)
label inds defaultdict(<class 'list'>, {(0, 0): [(0.0030120481927710845, 0.094, 0.5753012048192772, 0.632)], (3, 3): [(0.49096385542168675, 0.532, 0.6686746987951807, 0.72)]}) 1 torch.Size([30, 7, 7])
labels:k  0.0030120481927710845 0.094 0.5753012048192772 0.632
output:  tensor(0.0073, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(-0.0003, device='cuda:0', grad_fn=<UnbindBackward0>) tensor(0.0052, dev