In [2]:
import os
import random

import cv2
import numpy as np

import torch
from torch.utils.data import DataLoader
from torchvision import models

from resnet_yolo import resnet50
from dataset import VocDetectorDataset
from eval_voc import evaluate
from predict import predict_image
from config import VOC_CLASSES, COLORS
from kaggle_submission import output_submission_csv
import matplotlib.pyplot as plt
from tqdm import tqdm

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [3]:
S = 14
N = 24
B = 2
Classes = 20
bounding = 5

# pred_tensor = torch.zeros(N, S, S, B*bounding + Classes)
# target_tensor = torch.zeros(N, S, S, B*bounding + Classes)

pred_tensor = torch.randn(N, S, S, B*bounding + Classes)
target_tensor = torch.zeros(N, S, S, B*bounding + Classes)

contains_object_mask = (target_tensor[:,:,:,4] > 0) | (target_tensor[:,:,:,9] > 0)
no_object_mask = (target_tensor[:,:,:,4] == 0)   &  (target_tensor[:,:,:,9] == 0)

no_object_prediction = pred_tensor[no_object_mask].unsqueeze(-1).view(-1,30)
no_object_target = target_tensor[no_object_mask].unsqueeze(-1).view(-1,30)

indices = torch.tensor([4,9])
no_object_prediction_confidences = torch.index_select(no_object_prediction, 1, indices)
no_object_target_confidences = torch.index_select(no_object_target, 1, indices)

no_object_prediction_mask = (no_object_prediction_confidences[:,0] > 0) | (no_object_prediction_confidences[:,1] > 0)
no_object_prediction_mask = no_object_prediction_mask.unsqueeze(-1).expand_as(no_object_prediction_confidences)

positive_prediction_confidences = no_object_prediction_confidences[no_object_prediction_mask]

# We create the confidences mask, then mask to create the positive prediction confidences
positive_prediction_confidences_on_pred = no_object_prediction_confidences[no_object_prediction_mask]
positive_prediction_confidences_on_target = no_object_target_confidences[no_object_prediction_mask]

no_object_loss = torch.sum(torch.pow(positive_prediction_confidences_on_pred - positive_prediction_confidences_on_target, 2))

print(no_object_loss)
print(positive_prediction_confidences.unsqueeze(-1).size())

# import pdb; pdb.set_trace()

tensor(6961.5771)
torch.Size([6974, 1])


In [4]:
def compute_iou(box1, box2):                                                                                                                                                             
    '''Compute the intersection over union of two set of boxes, each box is [x1,y1,x2,y2].
    Args:
      box1: (tensor) bounding boxes, sized [N,4].
      box2: (tensor) bounding boxes, sized [M,4].
    Return:
      (tensor) iou, sized [N,M].
    '''
    N = box1.size(0)
    M = box2.size(0)

    lt = torch.max(
        box1[:,:2].unsqueeze(1).expand(N,M,2),  # [N,2] -> [N,1,2] -> [N,M,2]
        box2[:,:2].unsqueeze(0).expand(N,M,2),  # [M,2] -> [1,M,2] -> [N,M,2]
    )   

    rb = torch.min(
        box1[:,2:].unsqueeze(1).expand(N,M,2),  # [N,2] -> [N,1,2] -> [N,M,2]
        box2[:,2:].unsqueeze(0).expand(N,M,2),  # [M,2] -> [1,M,2] -> [N,M,2]
    )   

    wh = rb - lt  # [N,M,2]
    wh[wh<0] = 0  # clip at 0
    inter = wh[:,:,0] * wh[:,:,1]  # [N,M]

    area1 = (box1[:,2]-box1[:,0]) * (box1[:,3]-box1[:,1])  # [N,]
    area2 = (box2[:,2]-box2[:,0]) * (box2[:,3]-box2[:,1])  # [M,]
    area1 = area1.unsqueeze(1).expand_as(inter)  # [N,] -> [N,1] -> [N,M]
    area2 = area2.unsqueeze(0).expand_as(inter)  # [M,] -> [1,M] -> [N,M]

    iou = inter / (area1 + area2 - inter)
    return iou 

In [81]:
S = 14
N = 24
B = 2
Classes = 20
bounding = 5

mseloss = torch.nn.MSELoss(reduction='sum')

# pred_tensor = torch.zeros(N, S, S, B*bounding + Classes)
# target_tensor = torch.zeros(N, S, S, B*bounding + Classes)
last_dim_size = 30

pred_tensor = torch.randn(N, S, S, B*bounding + Classes)
target_tensor = torch.randn(N, S, S, B*bounding + Classes)

contains_object_mask = (target_tensor[:,:,:,4] > 0) | (target_tensor[:,:,:,9] > 0)
contains_object_pred = pred_tensor[contains_object_mask].view(-1, last_dim_size)
contains_object_target = target_tensor[contains_object_mask].view(-1, last_dim_size)

no_object_mask = (target_tensor[:,:,:,4] == 0)   &  (target_tensor[:,:,:,9] == 0)
no_object_prediction = pred_tensor[no_object_mask].unsqueeze(-1).view(-1,30)
no_object_target = target_tensor[no_object_mask].unsqueeze(-1).view(-1,30)

bounding_box_pred = contains_object_pred[:,:5*B]
# And the remainder is classes_pred
classes_pred = contains_object_pred[:,5*B:]

# 5 * B because size(batchsize,S,S,Bx5+20=30), last dimension size is Bx5+20. We want the Bx5 elements only.
bounding_box_target = contains_object_target[:,:5*B]
# And the remainder is classes_target
classes_target = contains_object_target[:,5*B:]


# print(bounding_box_target.view(-1,5))


# Bounding box stuff:
bounding_box_target = bounding_box_target.contiguous().to('cuda').view(-1,5)
bounding_box_pred = bounding_box_pred.contiguous().to('cuda').view(-1,5)
# Arbitrarily pick 3 to view
# box_target = bounding_box_target[:1000,:]
# box_pred = bounding_box_pred[:1000,:]
box_target = bounding_box_target
box_pred = bounding_box_pred


box_target_processed = torch.zeros(box_target.size())
box_target_processed[:,0] = box_target[:,0]/S - (0.5*box_target[:,2])
box_target_processed[:,1] = box_target[:,1]/S - (0.5*box_target[:,3])
box_target_processed[:,2] = box_target[:,0]/S + (0.5*box_target[:,2])
box_target_processed[:,3] = box_target[:,1]/S + (0.5*box_target[:,3]) 


# Remove that last element (c) in last dimension, we don't need it
box_target_processed = box_target_processed[:,:-1]
# print(box_target_processed.shape)
# Pre-process box prediction
box_pred_processed = torch.zeros(box_pred.size())
box_pred_processed[:,0] = box_pred[:,0]/S - (0.5*box_pred[:,2])
box_pred_processed[:,1] = box_pred[:,1]/S - (0.5*box_pred[:,3])
box_pred_processed[:,2] = box_pred[:,0]/S + (0.5*box_pred[:,2])
box_pred_processed[:,3] = box_pred[:,1]/S + (0.5*box_pred[:,3])       

# Remove the last element (c) in last dimension, we don't need it
box_pred_processed = box_pred_processed[:,:-1]

contains_object_response_mask = torch.BoolTensor(box_target.size()).fill_(False)
box_target_iou = torch.zeros(box_target.size()).to('cuda') 
# print(box_target_processed.size()[0])
for i in range(0,box_target_processed.size()[0], B):
#     print(box_pred_processed[i:i+B,:].shape)
#     print(box_target_processed[i,:].unsqueeze(0).shape)
    iou = compute_iou(box_pred_processed[i:i+B,:], box_target_processed[i,:].unsqueeze(0))
    max_val, max_index = iou.max(0)
    max_index = max_index.data.to('cuda')
    contains_object_response_mask[i+max_index] = True # Broadcast 1 into all the 5 elems of chosen row
    box_target_iou[i+max_index, 4] = 10

box_prediction_response = bounding_box_pred[contains_object_response_mask].view(-1,5)
box_target_response = bounding_box_target[contains_object_response_mask].view(-1,5)
box_target_response_iou = box_target_iou[contains_object_response_mask].view(-1,5)

# print(box_prediction_response.view(-1,5))
# print(box_prediction_response)
# print(box_target_response_iou)
contain_loss = torch.sum(torch.pow(box_prediction_response[:,4] - box_target_response_iou[:,4], 2))
print(contain_loss)
print(mseloss(box_prediction_response[:,4],box_target_response_iou[:,4]))
# print(box_target_iou)
# print(contains_object_response_mask)
# print(box_target_response_iou)
# print(box_target_response_iou.view(-1,5))

# print(box_iou)
# print(box_iou.shape)
# print(box_target_processed.size())
# print(box_target)
# print("\n\n\n")
# print(box_target_processed)


# print(bounding_box_pred.size())

# bounding boxes in cells from target that we known contain an object
# bounding boxes in cells from prediction that we know contain an object

tensor(357646.7500, device='cuda:0')
tensor(357646.7500, device='cuda:0')


In [32]:
classes_pred = torch.randn(14,24,24,20)
classes_target = torch.randn(14,24,24,20)

per_cell_loss = torch.sum(torch.pow(classes_pred - classes_target, 2))

print(per_cell_loss)

tensor(321088.4688)


In [33]:
#print(target)
S = 14
N = 24
B = 2
Classes = 20
bounding = 5

# pred_tensor = torch.zeros(N, S, S, B*bounding + Classes)
# target_tensor = torch.zeros(N, S, S, B*bounding + Classes)

pred_tensor = torch.randn(N, S, S, B*bounding + Classes)
target_tensor = torch.zeros(N, S, S, B*bounding + Classes)
pred = pred_tensor
target = target_tensor

pred_cls, pred_response, pred_bboxes = pred
label_cls, label_response, label_bboxes = target


pred_cls = pred_cls
pred_response =  pred_response
pred_bboxes = pred_bboxes

label_cls = label_cls
label_response =  label_response
label_bboxes = label_bboxes



batch_size = pred_response.size(0)

no_obj_mask = (label_response[:, :, :, 0] < 1).unsqueeze(-1).expand_as(label_response)

obj_response_mask = (label_response[:, :, :, 0] > 0).unsqueeze(-1).expand_as(label_response)

obj_box_mask = (label_response[:, :, :, 0] > 0).unsqueeze(-1).expand_as(label_bboxes)

obj_cls_mask = (label_response[:, :, :, 0] > 0).unsqueeze(-1).expand_as(label_cls)

no_obj_contain_pred = pred_response[no_obj_mask].view(-1)
no_obj_contain_target = label_response[no_obj_mask].view(-1)


obj_contain_pred = pred_response[obj_response_mask].view(-1, B)

obj_contain_target = label_response[obj_response_mask].view(-1, B)

# class pred response
obj_class_pred = pred_cls[obj_cls_mask].view(-1, 20)
obj_class_target = label_cls[obj_cls_mask].view(-1, 20)

# box pred response
obj_loc_pred = pred_bboxes[obj_box_mask].view(-1, B * 4)
obj_loc_target = label_bboxes[obj_box_mask].view(-1, B * 4)

iou = torch.zeros(obj_loc_pred.size(0), B)
iou = Variable(iou)

for j in range(B):
    pred_bb = torch.zeros(obj_loc_pred.size(0), 4)
    pred_bb = Variable(pred_bb)

    target_bb = torch.zeros(obj_loc_pred.size(0), 4)
    target_bb = Variable(target_bb)

    target_bb[:, 0] = obj_loc_target[:, j * 4] - 0.5 * pow(obj_loc_target[:, j * 4 + 2], 2)
    target_bb[:, 1] = obj_loc_target[:, j * 4 + 1] - 0.5 * pow(obj_loc_target[:, j * 4 + 3], 2)
    target_bb[:, 2] = obj_loc_target[:, j * 4] + 0.5 * pow(obj_loc_target[:, j * 4 + 2], 2)
    target_bb[:, 3] = obj_loc_target[:, j * 4 + 1] + 0.5 * pow(obj_loc_target[:, j * 4 + 3], 2)

    pred_bb[:, 0] = obj_loc_pred[:, j * 4] - 0.5 * pow(obj_loc_pred[:, j * 4 + 2], 2)
    pred_bb[:, 1] = obj_loc_pred[:, j * 4 + 1] - 0.5 * pow(obj_loc_pred[:, j * 4 + 3], 2)
    pred_bb[:, 2] = obj_loc_pred[:, j * 4] + 0.5 * pow(obj_loc_pred[:, j * 4 + 2], 2)
    pred_bb[:, 3] = obj_loc_pred[:, j * 4 + 1] + 0.5 * pow(obj_loc_pred[:, j * 4 + 3], 2)

    iou[:, j] = self.compute_iou(target_bb, pred_bb)

max_iou, max_index = iou.max(1)
min_iou, _ = iou.min(1)
max_index = max_index.data.cpu()

coo_response_mask = torch.ByteTensor(obj_loc_pred.size(0), B * 4)

coo_response_mask.zero_()
for i in range(obj_loc_pred.size(0)):
    coo_response_mask[i, max_index[i] * 4:max_index[i] * 4 + 4] = 1

obj_axis_pred = obj_loc_pred[coo_response_mask].view(-1, 4)
obj_axis_target = obj_loc_target[coo_response_mask].view(-1, 4)

iou_response_mask = coo_response_mask[:, [i * 4 for i in range(B)]]

obj_response_pred = obj_contain_pred[iou_response_mask].view(-1)
obj_response_target = obj_contain_target[iou_response_mask].view(-1)

obj_local_loss = F.mse_loss(obj_axis_pred[:, 0:2], obj_axis_target[:, 0:2], size_average=False) + \
                 F.mse_loss(obj_axis_pred[:, 2:4], obj_axis_target[:, 2:4], size_average=False)
obj_class_loss = F.mse_loss(obj_class_pred, obj_class_target, size_average=False)


max_iou = (max_iou.data)
conf_id = ((1 - max_iou) * self.l_noobj + max_iou)

conf_id = Variable(conf_id, requires_grad=True)

obj_contain_loss = F.mse_loss(obj_response_pred, max_iou, size_average=False)

no_obj_contain_loss = F.mse_loss(no_obj_contain_pred, no_obj_contain_target, size_average=False)

iou_loss = F.mse_loss(max_iou, obj_response_target, size_average=False)

loss_all = (self.l_coord * obj_local_loss + obj_class_loss + obj_contain_loss + self.l_noobj * no_obj_contain_loss + iou_loss) / batch_size

loss_info = {
    'local_loss': self.l_coord * obj_local_loss.data,
    'class_loss': obj_class_loss.data,
    'contain_loss': obj_contain_loss.data,
    'no_contain_loss': self.l_noobj * no_obj_contain_loss,
    'iou_loss': iou_loss,
    'mean_iou': torch.mean(max_iou)
}

return loss_all, loss_info


IndentationError: unexpected indent (<ipython-input-33-2208b855d186>, line 16)