In [10]:
import numpy as np
import torch.nn as nn
import torchvision.transforms as transforms
import torch
from torch.utils.data import DataLoader
import pandas as pd
from utils import bbox_iou
from dataset import DetectionDataset, Normalise, Pad, ToTensor

## Labels

In [11]:
mean = [92.11938007161459, 102.83839236762152, 104.90335580512152]
std = [66.09941202519124, 70.6808655565459, 75.05305001603533]

## load custom dataset + transforms
transformed_train_data = DetectionDataset(
    label_dict="det_train_shortened.json",
    root_dir='images/',
    classes_file="data/bdd100k.names",
    grid_sizes=[13, 26, 52],
    anchors = np.array([
            [[116,90], [156,198], [373,326]],
            [[30, 61], [62, 45], [59,119]],
            [[10, 13], [16, 30], [33, 23]],
        ]),
    transform=transforms.Compose([
        Normalise(
            mean=mean,
            std=std
        ),
        Pad(416),
        ToTensor()
    ])
)

# separate into batches
train_loader = DataLoader(
    transformed_train_data,
    batch_size=1,
    shuffle=True,
    num_workers=0
)

In [13]:

for i, data in enumerate(train_loader):
    image, labels = data.values()
labels.shape

torch.Size([1, 10647, 17])

## Predictions

In [47]:
pretend_preds = torch.load("ex_tensors/yolo_layer_output_size10647.pt")[:,:,:17]

In [60]:
# pr(obj) > 0.9
pretend_preds[pretend_preds[:,:,4] > 0.9][0]

tensor([3.1557e+02, 1.2112e+02, 1.1026e+02, 4.8690e+01, 9.8914e-01, 2.1258e-04,
        1.3264e-05, 2.4101e-01, 1.8465e-05, 1.6691e-05, 2.1706e-03, 4.3476e-05,
        8.3802e-01, 1.8591e-04, 6.2785e-06, 5.8800e-05, 1.1566e-05])

In [62]:
labels[0][93]

tensor([ 69.4927, 191.9236,   4.7743,   7.1615,   1.0000,   0.0000,   0.0000,
          0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   0.0000,   1.0000,
          0.0000,   0.0000,   0.0000])

In [None]:
class Yolo_Loss(nn.Module):
    def __init__(self):
        super().__init__()
        # losses and functions
        self.bcwell = nn.BCEWithLogitsLoss()
        self.mse = nn.MSELoss()
        self.cross_entropy = nn.CrossEntropyLoss()
        self.sigmoid = nn.Sigmoid()

        # lambda constants
        self.lambda_class = 1
        self.lambda_noobj = 10
        self.lambda_box = 10
        self.lambda_obj = 1


    def forward(self, prediction, label):
        """
        Computes difference between prediction and label.
        
        Input:
        =prediction=    Tensor of all prediction arrays of size (n_batches, 10647, 5+n_classes).
        =label=         Tensor of all label arryays of size (n_batches, 10647, 5+n_classes).
        
        Output:
        =loss=          Total loss computed for this batch.
        """
        # check objectness for identity function - 4th ix in labels and predictions
        obj = (label[:,:,4] == 1) # I^obj_ij
        noobj = (label[:,:,4] == 0) # I^noobj_ij

        ## box loss
        x_mse = torch.square(prediction[:,:,0] - labels[:,:,0])
        y_mse = torch.square(prediction[:,:,1] - labels[:,:,1])
        bbox_centre_loss = torch.sum(x_mse + y_mse)

        w_mse = torch.square(torch.sqrt(prediction[:,:,2]) - torch.sqrt(labels[:,:,2]))
        h_mse = torch.square(torch.sqrt(prediction[:,:,3]) - torch.sqrt(labels[:,:,3]))
        bbox_dims_loss = torch.sum(w_mse + h_mse)

        bbox_loss = self.lambda_box*(bbox_centre_loss + bbox_dims_loss)

        ## object loss
        obj_loss = 

        ## no object loss
        noobj_loss = self.bcwell((prediction[None][noobj]), (label[None][noobj]))

        ## class loss
        class_loss = self.cross_entropy((prediction[None][obj]), (label[None][obj].long()))

        loss = self.lambda_box*bbox_loss + self.lambda_obj*obj_loss + self.lambda_noobj*noobj_loss + self.lambda_class*class_loss

        return loss

In [None]:
def forward(self, prediction, label, anchors):
    """
    Computes difference between prediction and label.
    
    Input:
    =prediction=    Tensor of all prediction arrays of size (n_batches, 10647, 5+n_classes).
    =label=         Tensor of all label arryays of size (n_batches, 10647, 5+n_classes).
    
    Output:
    =loss=          Total loss computed for this batch.
    """
    # check objectness for identity function - 4th ix in labels and predictions
    obj = (label[:,:,4] == 1) # I^obj_ij
    noobj = (label[:,:,4] == 0) # I^noobj_ij

    ## box coordinate loss
    prediction[None] = self.sigmoid(prediction[None]) # currently x,y coords
    label[None] = torch.log((1e-16 + label[None]/anchors)) # width and height coords
    bbox_coord_loss = self.mse(prediction[None][obj], label[None][obj])

    ## object loss
    anchors = anchors.reshape(1,3,1,1,2)
    box_preds = torch.cat([self.sigmoid(prediction[None]), torch.exp(prediction[None])*anchors], dim=-1)
    result = bbox_iou(box_preds[obj], label[None][obj]).detach()
    obj_loss = self.mse(self.sigmoid(prediction[None][obj]), result*label[None][obj])

    ## no object loss
    noobj_loss = self.bcwell((prediction[None][noobj]), (label[None][noobj]))

    ## class loss
    class_loss = self.cross_entropy((prediction[None][obj]), (label[None][obj].long()))

    loss = self.lambda_box*bbox_coord_loss + self.lambda_obj*obj_loss + self.lambda_noobj*noobj_loss + self.lambda_class*class_loss

    return loss