In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torchvision.transforms as transforms
from torchvision.utils import draw_bounding_boxes
import torch.optim as optim

import numpy as np
import matplotlib.pyplot as plt

from model import YOLOv1
from loss import YoloLoss
from bdd100k import BDD100k
from utils import xywh_to_xyxy, xyxy_to_xywh, intersection_over_union, non_max_supression, precision, recall

transform = transforms.Compose([transforms.Resize((448,448))])
val_dataset = BDD100k(root='/Users/calixkim/VIP27920/bdd100k/', train=False, transform=transform)
val_loader = data.DataLoader(dataset=val_dataset, 
                            batch_size=8,
                            num_workers=2,
                            shuffle=False)

model_ = YOLOv1(split_grids=7, num_bboxes=2, num_classes=13)
model_.load_state_dict(torch.load('/Users/calixkim/VIP27920/Lane_Detection_F22/yolov1//weights/yolov1_150.pt', map_location=torch.device('cpu')))
model_.eval();

def unnorm_bbox(bbox, width, height, i, j):
    '''
    Unormalize the bounding box predictions based on the yolo predictions
    '''
    bbox[:,0] = width / (7 / (bbox[:,0] + j))
    bbox[:,1] = height / (7 / (bbox[:,1] + i))
    bbox[:,2] = width / (7 / bbox[:,2])
    bbox[:,3] = height / (7 / bbox[:,3])

    return bbox

def draw_data_loader(imgs, labels):
    '''
    Draw the predicted bounding boxes from the network on a single image
    Args:
        imgs (tensor): images of the predicted bboxes
        labels (tensor): ground truth bboxes
    Returns:
        ground_truth (numpy array): ground truth bboxes with object in it. [batch #, x, y, w, h]
    '''
    b, _, height, width = imgs.shape
    imgs = imgs.to(torch.uint8)
    
    S = labels.shape[1]
    conf = labels[..., 13]
    I = torch.nonzero(conf).numpy()
    
    batch, i, j = I[:,0], I[:,1], I[:,2] 
    bbox = labels[batch,i,j,14:18]
    bbox[...,0] = width / (S / (bbox[...,0] + j))
    bbox[...,1] = height / (S / (bbox[...,1] + i))
    bbox[...,2] = width / (S / bbox[...,2])
    bbox[...,3] = height / (S / bbox[...,3])
    bbox = xywh_to_xyxy(bbox, width, height)
    batch = np.expand_dims(batch, axis=0)

    ground_truth = np.concatenate((batch.T,bbox),axis=1)
    
    return ground_truth

def draw_batch(img, pred, out_dir='./', display=False):
    '''
    Draw the predicted bounding boxes from the network on a single image
    Args:
        img (tensor): image of the predicted bboxes
        pred (tensor): predicted bboxes
        out_dir (str): output directory to store image
        display (bool): display the image or not
    Returns:
    '''
    class_dict = {
        'pedestrian' : 1,
        'rider' : 2,
        'car' : 3,
        'truck' : 4, 
        'bus' : 5, 
        'train' : 6, 
        'motorcycle' : 7,
        'bicycle' : 8,
        'traffic light' : 9,
        'traffic sign' : 10,
        'other vehicle': 11,
        'trailer': 12,
        'other person': 13,
    }
    num_to_class = {i:s for s,i in class_dict.items()}

    s = pred.shape[-1]
    img = torch.tensor(img) if not isinstance(img, torch.Tensor) else img
    assert type(pred) == torch.Tensor
    img = img.to(torch.uint8)
    batch, _, height, width = img.shape
    nms = []
    _, pclass = torch.max(pred[:, :, :13], dim=2)
    for i in range(s):
        for j in range(s):
            conf = pred[i, j, 13]
            if conf > 0.5:
                class_pred = num_to_class[pclass[i, j].item()]
                label = [class_pred]
                bbox1 = pred[i, j, 14:18].unsqueeze(0)
                bbox1 = unnorm_bbox(bbox1, width, height, i ,j)
                bbox1 = xywh_to_xyxy(bbox1, width, height)
                bbox2 = pred[i, j, 19:23].unsqueeze(0)
                bbox2 = unnorm_bbox(bbox2, width, height, i ,j)
                bbox2 = xywh_to_xyxy(bbox2, width, height)
                img = draw_bounding_boxes(img, bbox1, width=3, labels=label, colors=(0, 255, 0)) 
                img = draw_bounding_boxes(img, bbox2, width=3, labels=label, colors=(0, 255, 0))
                class_conf = torch.stack((torch.tensor((class_dict[class_pred])), conf))
                nms.append(torch.cat((class_conf, pred[i, j, 14:18])))
                nms.append(torch.cat((class_conf, pred[i, j, 19:23])))

    # nms = sorted(nms, key=lambda x: x[1])
    nms = non_max_supression(nms, 0.5, 0.5)
    img = np.transpose(img.cpu().numpy(), (1,2,0)).astype(np.uint8)
    
    return img, nms
    

def draw_nms(img, nms, out_dir='./', display=False):
    '''
    Draw the predicted bounding boxes from the network on a single image
    Args:
        img (tensor): image of the predicted bboxes
        pred (tensor): predicted bboxes
        out_dir (str): output directory to store image
        display (bool): display the image or not
    Returns:
    '''
    class_dict = {
        'pedestrian' : 1,
        'rider' : 2,
        'car' : 3,
        'truck' : 4, 
        'bus' : 5, 
        'train' : 6, 
        'motorcycle' : 7,
        'bicycle' : 8,
        'traffic light' : 9,
        'traffic sign' : 10,
        'other vehicle': 11,
        'trailer': 12,
        'other person': 13,
    }
    num_to_class = {i:s for s,i in class_dict.items()}

    img = torch.tensor(img) if not isinstance(img, torch.Tensor) else img
    img = img.to(torch.uint8)
    batch, _, height, width = img.shape

    for x in nms:
        bbox = xywh_to_xyxy(x[2:].unsqueeze(0), width, height)
        label = str(round((x[1].item()),2))
        img = draw_bounding_boxes(img, bbox, width=3, labels=[label], colors=(0,255,0))

    img = np.transpose(img.cpu().numpy(), (1,2,0)).astype(np.uint8)
    
    return img

# Recall
def accuracy(label, nms):
    '''
    Args:
        label (tensor): ground truth bboxes
        nms (list): list of tensors after non max suppression. [class, conf, x, y, w, h]
    Returns:
        tp (int): True Positive
        fp (int): False Positive
        fn (int): False Negative
        tn (int): True Negative
    '''

    tp, fp, fn, tn = (0, 0, 0, 0)
    nms = torch.stack((nms))

    ground_truth_all = []   # [0|1, x, y, w, h] * 49
    s = label.shape[1]
    for i in range(s):
        for j in range(s):
            ground_truth_all.append(label[i, j, 13:18])

    for pred in nms:
        iou_max = 0
        for i, v in enumerate(ground_truth_all):
            iou = intersection_over_union(v[1:].unsqueeze(0), pred[2:].unsqueeze(0))
            if iou > iou_max:
                iou_max = iou
                gt_max_idx = i

        if iou_max > 0.1:   # detected
            print(ground_truth_all[gt_max_idx])
            if ground_truth_all[gt_max_idx][0] == 0:
                fp += 1
            else:
                tp += 1
        else:
            fn += 1
            
    tn = 49 - tp - fp - fn
    
    return tp, fp, fn, tn

In [2]:
#change, copy paste to actual function later

def draw_data_loader(imgs, labels):
    '''
    Draw the predicted bounding boxes from the network on a single image
    Args:
        imgs (tensor): images of the predicted bboxes
        labels (tensor): ground truth bboxes
    Returns:
        ground_truth (numpy array): ground truth bboxes with object in it. [batch #, x, y, w, h]
    '''
    b, _, height, width = imgs.shape
    imgs = imgs.to(torch.uint8)
    
    S = labels.shape[1]
    conf = labels[..., 13]
    I = torch.nonzero(conf).numpy()
    
    batch, i, j = I[:,0], I[:,1], I[:,2] 
    bbox = labels[batch,i,j,14:18]
    bbox[...,0] = width / (S / (bbox[...,0] + j))
    bbox[...,1] = height / (S / (bbox[...,1] + i))
    bbox[...,2] = width / (S / bbox[...,2])
    bbox[...,3] = height / (S / bbox[...,3])
    bbox = xywh_to_xyxy(bbox, width, height)
    batch = np.expand_dims(batch, axis=0)

    ground_truth = np.concatenate((batch.T,bbox),axis=1)
    
    return ground_truth

In [3]:
TP, FP, FN, TN = 0, 0, 0, 0
rec = []
prec = []




In [145]:
# test code
test = torch.rand(8,7,7,18)
img_ = torch.rand(8, 3, 448, 448)
test[1,1,1,13] = 1
test[2,2,2,13] = 1
test[3,3,3,13] = 1
test = test.to(torch.uint8)
batch, _, height, width = img_.shape
# print(test[..., 13:18].shape) # 8 images, [conf, x, y, w, h] 
conf = test[..., 13]
I = torch.nonzero(conf).numpy()
batch = I[:,0]
i = I[:,1]
j = I[:,2]
# print(i,j)
bbox = test[:,i,j,14:18]
print(bbox.shape) # -> torch.Size([N, 4]) N: number of nonzero conf in ground truth
bbox = torch.tensor([[[0.1,0.2,0.3,0.4], [0.5,0.6,0.7,0.8], [0.1, 0.2, 0.1, 0.2]],
                    [[0.1,0.2,0.3,0.4], [0.5,0.6,0.7,0.8], [0.1, 0.2, 0.1, 0.2]],
                    [[0.1,0.6,0.3,0.4], [0.5,0.6,0.7,0.8], [0.1, 0.2, 0.1, 0.2]],
                    [[0.1,0.2,0.3,0.4], [0.5,0.6,0.7,0.8], [0.1, 0.2, 0.1, 0.2]],
                    [[0.1,0.2,0.3,0.4], [0.5,0.9,0.7,0.8], [0.1, 0.2, 0.1, 0.2]],
                    [[0.1,0.2,0.3,0.4], [0.5,0.6,0.3,0.8], [0.1, 0.2, 0.1, 0.2]],
                    [[0.1,0.2,0.3,0.4], [0.5,0.6,0.7,0.8], [0.1, 0.2, 0.1, 0.2]],
                    [[0.1,0.2,0.3,0.4], [0.5,0.6,0.7,0.8], [0.1, 0.2, 0.1, 0.2]]])
print(bbox[..., 2])
bbox[...,0] = width / (7 / (bbox[...,0] + j))
bbox[...,1] = height / (7 / (bbox[...,1] + i))
bbox[...,2] = width / (7 / bbox[...,2])
bbox[...,3] = height / (7 / bbox[...,3])
print(bbox)
bbox = xywh_to_xyxy(bbox, width, height)
print(bbox)
    #     img = draw_bounding_boxes(img, bbox, width=3, colors=(0,255,0))

    # img = np.transpose(img.cpu().numpy(), (1,2,0)).astype(np.uint8)


I: [[1 1 1]
 [2 2 2]
 [3 3 3]]
i  [1 2 3]
[1 2 3] [1 2 3]
torch.Size([8, 3, 4])
tensor([[0.3000, 0.7000, 0.1000],
        [0.3000, 0.7000, 0.1000],
        [0.3000, 0.7000, 0.1000],
        [0.3000, 0.7000, 0.1000],
        [0.3000, 0.7000, 0.1000],
        [0.3000, 0.3000, 0.1000],
        [0.3000, 0.7000, 0.1000],
        [0.3000, 0.7000, 0.1000]])
tensor([[[ 70.4000,  76.8000,  19.2000,  25.6000],
         [160.0000, 166.4000,  44.8000,  51.2000],
         [198.4000, 204.8000,   6.4000,  12.8000]],

        [[ 70.4000,  76.8000,  19.2000,  25.6000],
         [160.0000, 166.4000,  44.8000,  51.2000],
         [198.4000, 204.8000,   6.4000,  12.8000]],

        [[ 70.4000, 102.4000,  19.2000,  25.6000],
         [160.0000, 166.4000,  44.8000,  51.2000],
         [198.4000, 204.8000,   6.4000,  12.8000]],

        [[ 70.4000,  76.8000,  19.2000,  25.6000],
         [160.0000, 166.4000,  44.8000,  51.2000],
         [198.4000, 204.8000,   6.4000,  12.8000]],

        [[ 70.4000,  76.800

IndexError: index 3 is out of bounds for dimension 1 with size 3

In [200]:
a = np.ones((1,62))
c = np.arange(0,62)
print(a)
c = np.expand_dims(c, axis=0)
print(a.shape, c.shape)
print(c.T)
b = np.ones((62,4))
np.concatenate((c.T,b),axis=1)



[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]
(1, 62) (1, 62)
[[ 0]
 [ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [11]
 [12]
 [13]
 [14]
 [15]
 [16]
 [17]
 [18]
 [19]
 [20]
 [21]
 [22]
 [23]
 [24]
 [25]
 [26]
 [27]
 [28]
 [29]
 [30]
 [31]
 [32]
 [33]
 [34]
 [35]
 [36]
 [37]
 [38]
 [39]
 [40]
 [41]
 [42]
 [43]
 [44]
 [45]
 [46]
 [47]
 [48]
 [49]
 [50]
 [51]
 [52]
 [53]
 [54]
 [55]
 [56]
 [57]
 [58]
 [59]
 [60]
 [61]]


array([[ 0.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 2.,  1.,  1.,  1.,  1.],
       [ 3.,  1.,  1.,  1.,  1.],
       [ 4.,  1.,  1.,  1.,  1.],
       [ 5.,  1.,  1.,  1.,  1.],
       [ 6.,  1.,  1.,  1.,  1.],
       [ 7.,  1.,  1.,  1.,  1.],
       [ 8.,  1.,  1.,  1.,  1.],
       [ 9.,  1.,  1.,  1.,  1.],
       [10.,  1.,  1.,  1.,  1.],
       [11.,  1.,  1.,  1.,  1.],
       [12.,  1.,  1.,  1.,  1.],
       [13.,  1.,  1.,  1.,  1.],
       [14.,  1.,  1.,  1.,  1.],
       [15.,  1.,  1.,  1.,  1.],
       [16.,  1.,  1.,  1.,  1.],
       [17.,  1.,  1.,  1.,  1.],
       [18.,  1.,  1.,  1.,  1.],
       [19.,  1.,  1.,  1.,  1.],
       [20.,  1.,  1.,  1.,  1.],
       [21.,  1.,  1.,  1.,  1.],
       [22.,  1.,  1.,  1.,  1.],
       [23.,  1.,  1.,  1.,  1.],
       [24.,  1.,  1.,  1.,  1.],
       [25.,  1.,  1.,  1.,  1.],
       [26.,  1.,  1.,  1.,  1.],
       [27.,  1.,  1.,  1.,  1.],
       [28.,  1.,  1.,  1.,  1.],
       [29.,  