In [8]:
import os
import torch

# Import some common libraries
import numpy as np
import pandas as pd
import cv2

# Import omidb
import omidb

# Import detectron2, detectron2 logger
import detectron2
from detectron2.structures import BoxMode
import json
from pycocotools import coco, mask

In [9]:
def get_omidb_dicts(csv_dir):
    df = pd.read_csv(csv_dir)
    
    dataset_dicts = []
    for idx, row in df.iterrows():
        record = {}
        filename = os.path.join(row["filename"])

        record["file_name"] = filename
        record["image_id"] = idx
        
        if row['side'] == 'R':
            # Bounding box breast area         
            bbox = row["bbox"][12:-1]
            coords1 = bbox.split(',')
            r= np.array([0,0,0,0])
            indx1 = 0
            for c in coords1:
                aux = c.split('=')
                r[indx1]=(int(aux[1]))
                indx1 +=1

            # we can get width and heigth from bbox
            record["height"] = r[3]-r[1]
            record["width"] = r[2]-r[0]
            
            # Bounding box roi  
            bbox_roi = row["bbox_roi"][12:-1]
            coords2 = bbox_roi.split(',')
            s= np.array([0,0,0,0])
            indx2 = 0
            for c in coords2:
                aux = c.split('=')
                s[indx2]=(int(aux[1]))
                indx2 +=1
            bbox_roi = omidb.mark.BoundingBox(s[0]-r[0],s[1]-r[1],s[2]-r[0],s[3]-r[1])
            
        else:
            read_path = os.path.join("stacked_without_difference_image", row["filename"])
            im = cv2.imread(read_path)
            h,w,_ = im.shape
            record["height"] = h
            record["width"] = w

            # Bounding box roi  
            bbox_roi = row["transformed_bbox_roi"][12:-1]
            coords2 = bbox_roi.split(',')
            s= np.array([0,0,0,0])
            indx2 = 0
            for c in coords2:
                aux = c.split('=')
                float_value = round(float(aux[1]), 0)
                s[indx2]=(int(float_value))
                indx2 +=1
            bbox_roi = omidb.mark.BoundingBox(s[0],s[1],s[2],s[3])

        px = [bbox_roi.x1, bbox_roi.x2, bbox_roi.x2, bbox_roi.x1]
        py = [bbox_roi.y1, bbox_roi.y1, bbox_roi.y2, bbox_roi.y2]
        poly = [(x + 0.5, y + 0.5) for x, y in zip(px, py)]
        poly = [p for x in poly for p in x]
        objs = []
        obj =  {
                "bbox": [bbox_roi.x1 , bbox_roi.y1, bbox_roi.x2 - bbox_roi.x1, bbox_roi.y2 - bbox_roi.y1], #[x,y,w,h] format
                "bbox_mode": BoxMode.XYXY_ABS,
                "segmentation": [poly],
                "category_id": 0,
                }
        objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts

In [10]:
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)

In [4]:
# Define the output file path
output_path = "validation_annotations_multiview_without_diff.json"

# Load the dataset into a list of dictionaries
dataset_dicts = get_omidb_dicts("/home/habtamu/Mammogram_Registration_Four_Resolution/transformed_lesion_validation_set.csv")

# Create a new COCO instance
coco_data = coco.COCO()

# Create a mapping from category names to category IDs
category_map = {"lesion": 0}

In [5]:
# Define the categories
categories = [
    {"id": 0, "name": "lesion"}
]

# Add the categories to the COCO dataset
coco_data.dataset["categories"] = categories

In [6]:
# Initialize the images and annotations lists in the COCO dataset
coco_data.dataset["images"] = []
coco_data.dataset["annotations"] = []
# Add the images and annotations to the COCO dataset
for image_dict in dataset_dicts:
    image_id = image_dict["image_id"]
    image_width = image_dict["width"]
    image_width = int(image_width)
    image_height = image_dict["height"]
    image_height = int(image_height)
    image_file_name = image_dict["file_name"]
    coco_data.dataset["images"].append({
        "id": image_id,
        "width": image_width,
        "height": image_height,
        "file_name": image_file_name
    })

    if "annotations" in image_dict:
        for annotation_dict in image_dict["annotations"]:
            category_id = 0
            bbox = annotation_dict["bbox"]
            segmentation = annotation_dict["segmentation"]
            segmentation_mask = mask.frPyObjects(segmentation, image_height, image_width)
            area = int(mask.area(segmentation_mask)[0])
            coco_data.dataset["annotations"].append({
                "id": len(coco_data.dataset["annotations"]),
                "image_id": image_id,
                "category_id": category_id,
                "bbox": bbox,
                "area": area,
                "segmentation": segmentation,
                "iscrowd": 0
            })

In [7]:
# Save the COCO dataset to a JSON file
with open(output_path, "w") as f:
    json.dump(coco_data.dataset, f, cls=NpEncoder)