In [None]:
from PIL import Image
import numpy as np                         
from skimage import measure
from shapely.geometry import Polygon, MultiPolygon
import json
import matplotlib.pyplot as plt

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
path = "/content/drive/MyDrive/Machine learning/data.zip"
!unzip "/content/drive/MyDrive/Machine learning/data.zip"

In [None]:
def create_sub_masks(mask_image):
    width, height = mask_image.size

    # Initialize a dictionary of sub-masks indexed by RGB colors
    sub_masks = {}
    for x in range(width):
        for y in range(height):
            # Get the RGB values of the pixel
            pixel = mask_image.getpixel((x,y))[:3]

            # If the pixel is not white...(we have white background)
            if pixel != (255, 255, 255):
                # Check to see if we've created a sub-mask...
                pixel_str = str(pixel)
                sub_mask = sub_masks.get(pixel_str)
                if sub_mask is None:
                   # Create a sub-mask (one bit per pixel) and add to the dictionary
                    # Note: we add 1 pixel of padding in each direction
                    # because the contours module doesn't handle cases
                    # where pixels bleed to the edge of the image
                    sub_masks[pixel_str] = Image.new('1', (width+2, height+2))

                # Set the pixel value to 1 (default is 0), accounting for padding
                sub_masks[pixel_str].putpixel((x+1, y+1), 1)

    return sub_masks

In [None]:
def create_sub_mask_annotation(sub_mask, image_id, category_id, annotation_id, is_crowd):
    #     measure.find_contours
    #Uses the “marching squares” method to compute a the iso-valued contours of 
    #the input 2D array for a particular level value. 
    contours = measure.find_contours(sub_mask, 0.5, positive_orientation='low')
    bboxes = []
    segmentations = []
    polygons = []
    areas = []
    annotations = []
    for contour in contours:
        # Flip from (row, col) representation to (x, y)
        # and subtract the padding pixel
        for i in range(len(contour)):
            row, col = contour[i]
            contour[i] = (col - 1, row - 1)

        # Make a polygon and simplify it
        #shapely.geometry Polygon
        
        poly = Polygon(contour)
        poly = poly.simplify(1.0, preserve_topology=False)
        polygons.append(poly)
        segmentation = np.array(poly.exterior.coords).ravel().tolist()
        segmentations.append(segmentation)
    # Combine the polygons to calculate the bounding box and area
    offset = 0
    steps = 0
    for i , polygon in enumerate(polygons):
        bbox = []
        area = []
        if not polygon.is_empty:
            bbox = polygon.bounds
            area = polygon.area
        annotation = {
        'segmentation': segmentations[i],
        'iscrowd': 0,
        'image_id': image_id,
        'category_id': category_id,
        'id': annotation_id,
        'bbox': bbox,
        'area': area}
        annotations.append(annotation)
        steps +=1
        annotation_id += 1
    return annotations , steps
        
        

In [None]:
errors=['./data/lesion/P009/10014.bmp',
 './data/lesion/P006/10011.bmp',
 './data/lesion/P058/10006.bmp',
 './data/lesion/P058/10016.bmp',
 './data/lesion/P032/10013.bmp',
 './data/lesion/P028/10007.bmp',
 './data/lesion/P013/10011.bmp',
 './data/lesion/P016/10012.bmp',
 './data/lesion/P016/10018.bmp']

In [8]:
coco_annotations={
    'info':{
        'description':'Dataset of brain lesions',
        'url':"",
        'year':2020,
        "contributor":"Dr. Martin Tabakow",
        "date_created": ""
    }, 
    'licenses':{
        'url':"",
        "id":0,
        'name':""
    },
    'images':[],
    'annotations':[],
    'categories':[]
}

In [16]:
coco_annotations['categories'].append({
    'supercategory':'brain damage',
    'id':1,
    'name':'cortical lesion'
})
coco_annotations['categories'].append({
    'supercategory':'brain damage',
    'id':2,
    'name':'subcortical lesion'
})

In [18]:
import os
from tqdm import tqdm
data_path = "./data/"
out_path = "brain_annotations.json"
idn=1
for (root,dirs,files) in os.walk(data_path, topdown=True):
    for filename in files:
        if filename.endswith('.bmp'):
            path = os.path.join(root, filename)
            paths.append(path)
            image={
                'license':0,
                'file_name':filename,
                'width':512,
                'height':512,
                'id':idn
            }
            coco_annotations['images'].append(image)
            idn+=1
with open(out_path,'w') as json_file:            
    json.dump(coco_annotations,json_file)

lesion_id = 1
is_crowd= 0
image_id = 1
annotation_id = 1
error_paths = []
first = True
offset = 0
for mask_image_path in tqdm(paths):
    if mask_image_path not in errors:
        with Image.open(mask_image_path) as mask_image:
            try:
                path_split = mask_image.filename.split('/')
                image_id = path_split[3]+"."+path_split[4].split('.')[0]
                print(image_id)
                sub_masks = create_sub_masks(mask_image)
                for color, sub_mask in sub_masks.items():
                    category_id = lesion_id
                    annotation, steps = create_sub_mask_annotation(sub_mask, image_id, category_id,annotation_id, is_crowd)
                    annotation_id += steps
            except Exception as e:
                print(e,image_id)
                error_paths.append(mask_image_path)
            with open(out_path,'r') as outfile:
                data=json.load(outfile)
                data['annotations'].append(annotation)
            with open(out_path, "w") as outfile:
                json.dump(data,outfile)

In [None]:
error_paths