In [1]:
import os
import random

from collections import OrderedDict
import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt
matplotlib.rcParams['figure.figsize'] = [5, 5]
matplotlib.rcParams['figure.dpi'] = 200

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms
import torchvision.models as models


from data_helper import UnlabeledDataset, LabeledDataset
from helper import collate_fn, draw_box, compute_ts_road_map, compute_ats_bounding_boxes
from hrnet import get_seg_model, get_config


In [2]:
random.seed(0)
np.random.seed(0)
torch.manual_seed(0);

In [3]:
# All the images are saved in image_folder
# All the labels are saved in the annotation_csv file
image_folder = '../data'
annotation_csv = '../data/annotation.csv'

In [4]:
# You should devide the labeled_scene_index into two subsets (training and validation)
labeled_scene_index = np.arange(106, 134)

transform = torchvision.transforms.ToTensor()

# The labeled dataset can only be retrieved by sample.
# And all the returned data are tuple of tensors, since bounding boxes may have different size
# You can choose whether the loader returns the extra_info. It is optional. You don't have to use it.
labeled_trainset = LabeledDataset(image_folder=image_folder,
                                  annotation_file=annotation_csv,
                                  scene_index=labeled_scene_index,
                                  transform=transform,
                                  extra_info=False
                                 )
trainloader = torch.utils.data.DataLoader(labeled_trainset, batch_size=2, shuffle=True, num_workers=2, collate_fn=collate_fn)

In [6]:
sample, target, road_image = iter(trainloader).next()
print(torch.stack(sample).shape)

torch.Size([2, 6, 3, 256, 306])


In [32]:
def extract_boxes(gt_bboxes):
    new_boxes = []
    for corners in gt_bboxes:
        point_squence = torch.stack([corners[:, 0], corners[:, 1], corners[:, 3], corners[:, 2], corners[:, 0]])
        x_ = point_squence.transpose(0,1)[0] * 10 + 400
        y_ = -point_squence.transpose(0,1)[1] * 10 + 400

        xmin = min(x_)
        xmax = max(x_)
        ymin = min(y_)
        ymax = max(y_)
        
        coors = [xmin, ymin, xmax, ymax]
        new_boxes.append(coors)
    return new_boxes

In [33]:
len(extract_boxes(target[0]['bounding_box']))

5

In [34]:
def generate_mask(gt_bboxes, h=800, w=800):
    
    boxes = extract_boxes(gt_bboxes)
    
    masks = np.zeros([h, w, len(boxes)], dtype='uint8') # [800, 800, number of bbox]
    
    # create masks
    for i in range(len(boxes)):
        box = boxes[i]
        row_s, row_e = int(box[1]), int(box[3])
        col_s, col_e = int(box[0]), int(box[2])
        masks[row_s:row_e, col_s:col_e, i] = 1

    return masks

In [38]:
m = generate_mask(target[0]['bounding_box'])

In [44]:
m.shape

(800, 800, 5)

In [39]:
def extract_bboxes(mask):
    """Compute bounding boxes from masks.
    mask: [height, width, num_instances]. Mask pixels are either 1 or 0.
    Returns: bbox array [num_instances, (y1, x1, y2, x2)].
    """
    boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32)
    for i in range(mask.shape[-1]):
        m = mask[:, :, i]
        # Bounding box.
        horizontal_indicies = np.where(np.any(m, axis=0))[0]
        vertical_indicies = np.where(np.any(m, axis=1))[0]
        if horizontal_indicies.shape[0]:
            x1, x2 = horizontal_indicies[[0, -1]]
            y1, y2 = vertical_indicies[[0, -1]]
            # x2 and y2 should not be part of the box. Increment by 1.
            x2 += 1
            y2 += 1
        else:
            # No mask for this instance. Might happen due to
            # resizing or cropping. Set bbox to zeros
            x1, x2, y1, y2 = 0, 0, 0, 0
        boxes[i] = np.array([y1, x1, y2, x2])
    return boxes.astype(np.int32)

In [40]:
extract_bboxes(m)

array([[320, 740, 341, 790],
       [389, 164, 408, 210],
       [422, 582, 442, 629],
       [352, 474, 372, 520],
       [317, 272, 336, 320]], dtype=int32)

In [55]:
def get_coor(boxes):
    """ convert boxes to normal coordinates.
    boxes: [num_instances, (y1, x1, y2, x2)]
    
    returns: [['fl_x', 'fr_x', 'bl_x', 'br_x'], ['fl_y', 'fr_y','bl_y', 'br_y']] 
                (num_instances, 2, 4) format
    """
    coor_list = []
    for box in boxes:
        xmin = box[1]
        ymin = box[0]
        xmax = box[3]
        ymax = box[2]
        
        coor = [[xmax, xmax, xmin, xmin], [ymin, ymax, ymin, ymax]]
        coor_list.append(coor)
        
    return torch.as_tensor(coor_list)

In [56]:
b = extract_bboxes(m)
print(get_coor(b))
print(get_coor(b).shape)

tensor([[[790, 790, 740, 740],
         [320, 341, 320, 341]],

        [[210, 210, 164, 164],
         [389, 408, 389, 408]],

        [[629, 629, 582, 582],
         [422, 442, 422, 442]],

        [[520, 520, 474, 474],
         [352, 372, 352, 372]],

        [[320, 320, 272, 272],
         [317, 336, 317, 336]]], dtype=torch.int32)
torch.Size([5, 2, 4])


In [57]:
target[0]['bounding_box'].shape

torch.Size([5, 2, 4])

In [67]:
print(m.shape)
print(m.squeeze(3))

(800, 800, 5)


AxisError: axis 3 is out of bounds for array of dimension 3

In [63]:
def get_target(batch_sz, target):
    out = []
    for t in range(batch_sz):
        print(generate_mask(target[t]['bounding_box']).shape)
        out.append( torch.as_tensor(generate_mask(target[t]['bounding_box'])) )
    return torch.stack(out)

get_target(2, target)

(800, 800, 5)
(800, 800, 20)


RuntimeError: stack expects each tensor to be equal size, but got [800, 800, 5] at entry 0 and [800, 800, 20] at entry 1

In [71]:
gt_masks = tf.gather(gt_masks, tf.where(non_zeros)[:, 0], axis=2,
                         name="trim_gt_masks")

1050