In [8]:
# add repo path to the system path
from pathlib import Path
import os, sys
repo_path= Path.cwd().resolve()
while '.gitignore' not in os.listdir(repo_path): # while not in the root of the repo
    repo_path = repo_path.parent #go up one level
sys.path.insert(0,str(repo_path)) if str(repo_path) not in sys.path else None

os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES']='0'

import numpy as np
import pandas as pd
import torch
from detectron2.structures import pairwise_iou, boxes

In [7]:
def small_box_removal(bboxes):
    """remove small bbox if it is completely inside another bbox

    Args:
        bboxes (list): list of bboxes

    Returns:
        boxes.Boxes: list of bboxes without the small ones
    """
    # sort bboxes by area
    bboxes = sorted(bboxes, key=lambda x: (x[2]-x[0])*(x[3]-x[1]), reverse=True)
    # remove small bboxes
    for i in range(len(bboxes)-1):
        for j in range(i+1, len(bboxes)):
            if (bboxes[i][0]<=bboxes[j][0] and bboxes[i][1]<=bboxes[j][1] and bboxes[i][2]>=bboxes[j][2] and bboxes[i][3]>=bboxes[j][3]):
                bboxes[j] = [0, 0, 0, 0]
    return boxes.Boxes([bbox for bbox in bboxes if bbox!=[0, 0, 0, 0]])


def remove_similar_bboxes(bboxes, iou_threshold=0.5):
    """remove boxes that are similar to each other base on the iou_threshold.
    The largest box is kept and the smaller one is removed.

    Args:
        bboxes (boxes.Boxes): box object from detectron2
        iou_threshold (float, optional): iou threshold, to define if a box is similar to another. Defaults to 0.5.

    Returns:
        boxes.Boxes: list of bboxes without the similar ones
    """
    # order bboxes per area, largest first, if they are not ordered
    bboxes_ex = np.asanyarray(sorted(bboxes, key=lambda x: (x[2]-x[0])*(x[3]-x[1]), reverse=True))
    bboxes_ex = boxes.Boxes(bboxes_ex)
    list_bbox_id = list(range(len(bboxes_ex)))
    # comoute iou
    iou = pairwise_iou(bboxes_ex, bboxes_ex)
    iou_upper = torch.triu(iou, diagonal=1)
    for i in range(iou_upper.shape[0]):
        for j in range(iou_upper.shape[1]):
            if iou_upper[i, j]>iou_threshold and i!=j:
                print(f'iou between {i} and {j} is {iou_upper[i, j]}')
                # remove the smaller bbox from the list, because they are ordered, that is the one with the smaller index
                list_bbox_id.remove(j) if j in list_bbox_id else None

    return bboxes_ex[list_bbox_id]


Some cases with mass-like lesion have bboxes included inside of other bboxes.<br>
This is due ot the definition we used for the bboxes, as well as the level of detail for several annotations.<br>
- For instance, in some cases, polylines was annotated, resulting in a well defined segementation masks, but redundant bboxes.<br>

To account for this problem, we decied to restrict the bboxes count per instance to eliminate two specific cases of bboxes:
1. Bboxes that are included inside of other bboxes.
    If a bbox is completely included inside of another bbox, we will remove the smaller bbox.
2. Bboxes that are very similar to other bboxes.
    If a bbox has an IoU with another bbox greater than <u> a threshold</u>, we will remove the smaller bbox.<br>
    The threshold should be defined by visaul inspection, but an initial value of 0.6 was proposed.


In [12]:
# data source
im_dir = repo_path / 'data/CDD-CESM/images/substracted'
bbox_dataframe = pd.read_csv(repo_path / 'data/CDD-CESM/masks/bbox_CESM.csv')
print(f'The original full mass-like lesions dataframe has {bbox_dataframe.shape[0]} rows')
print(f'This represents a number of patients of {bbox_dataframe["patient_id"].nunique()}')

The original full mass-like lesions dataframe has 595 rows
This represents a number of patients of 131


In [11]:
bbox_dataframe

Unnamed: 0,patient_id,image_name,region_id,bbox
0,2,P2_R_CM_CC,0,"(536, 2083, 355, 307)"
1,2,P2_R_CM_CC,1,"(633, 1620, 411, 283)"
2,2,P2_R_CM_CC,2,"(347, 1233, 307, 211)"
3,2,P2_R_CM_CC,3,"(629, 750, 129, 129)"
4,2,P2_R_CM_CC,4,"(738, 1161, 153, 153)"
...,...,...,...,...
590,323,P323_L_CM_MLO,1,"(72, 1893, 864, 569)"
591,325,P325_R_CM_CC,0,"(400, 593, 91, 103)"
592,325,P325_R_CM_CC,1,"(570, 1351, 129, 107)"
593,325,P325_R_CM_MLO,0,"(535, 916, 97, 103)"
