In [1]:
import cv2
import json
import time
import threading
import queue
import sys
import os

import numpy as np
import matplotlib as plt
import pathlib

from os import listdir
from os.path import isfile, join

In [2]:
### REQUIRED ###

# define location of dataset and return all files
dataset_location = "I:/FARTS/COCO_json/antmulti"
target_dir = "I:/FARTS/COCO_json/COCO"
SCORER = "Fabi"

### OPTIONAL ###

# set True to show processing results for each image (disables parallel processing)
DEBUG = False

# we can optionally remove occluded points from the dataframe
EXCLUDE_OCCLUDED_KEYPOINTS = True

enforce_single_class = False # overwrites multiple classes and groups all instances as one

# determine the proportion of a bounding box that needs to be filled before considering the visibility as too low
visibility_threshold = 0.05

In [3]:
all_files = [f for f in listdir(dataset_location) if isfile(join(dataset_location, f))]
all_files.sort()

# next, sort files into images, depth maps, segmentation maps, data, and colony info
# we only need the location and name of the data files, as all passes follow the same naming convention
dataset_data = []
dataset_img = []
dataset_ID = []
dataset_depth = []
dataset_norm = []
dataset_colony = None

for file in all_files:
    loc = dataset_location + "/" + file
    file_info = file.split("_")
    
    if file_info[1] == "BatchData":
        dataset_colony = loc
        
    elif len(file_info) == 2:
        # images are available in various formats, but annotation data is always written as json files
        if file_info[-1].split(".")[-1] == "json":
            dataset_data.append(loc)
        else:
            dataset_img.append(loc)
            
    elif file_info[2].split(".")[0] == "ID":
        dataset_ID.append(loc)
    elif file_info[2].split(".")[0]  == "depth":
        dataset_depth.append(loc)
    elif file_info[2].split(".")[0]  == "norm":
        dataset_norm.append(loc)
        
print("Found",len(dataset_data),"samples...")

# next sort the colony info into its IDs to determine the colony size and individual scales
# Opening colony (BatchData) JSON file
colony_file = open(dataset_colony)
 
# returns JSON object as a dictionary
colony = json.load(colony_file)
colony_file.close()


""" !!! requires IDs, model names, scales !!! """

if not enforce_single_class:
    # get provided classes to create a dictionary of class IDs and class names
    all_classes = []
    for subject in colony["Subject Variations"]:
        all_classes.append(colony["Subject Variations"][subject]["Class"])
        
    subject_class_names = []
    for class_name in all_classes:
        # check if exists in unique_list or not and replace any spaces with underscores
        class_name = class_name.replace(" ", "_")
        if class_name not in subject_class_names:
            # append unique classes 
            subject_class_names.append(class_name)
        
    subject_classes = {}
    for id,sbj in enumerate(subject_class_names):
        subject_classes[str(sbj)] = id
else:
    subject_class_names = np.array([0])
    subject_classes = {"insect" : 0}

print("\nA total of",len(subject_class_names),"unique classes have been found.")
print("The classes and respective class IDs are:\n",subject_classes,"\n")


print("Loaded colony file with seed", colony['Seed']) #,"and",len(colony['ID']),"individuals.")
    
if len(colony['Subject Variations']) > 1:
    multi_animal = True
    print("Generating MULTI-animal dataset! Containing",len(colony['Subject Variations']),"individuals")
else:
    multi_animal = False
    print("Generating SINGLE-animal dataset!")

Found 1000 samples...

A total of 3 unique classes have been found.
The classes and respective class IDs are:
 {'atta_vollenweideri_9.8': 0, 'atta_vollenweideri_50.1': 1, 'atta_vollenweideri_1.1': 2} 

Loaded colony file with seed 1234567
Generating MULTI-animal dataset! Containing 100 individuals


As there may be animals for which we don't use all bones we can return a list of all labels and exclude the respective locations from the pose data. As all animals use the same convention, we can simply read in one example and remove the corresponding indices from all animals.

In [4]:
### REQUIRED ###
# specify which labels to ignore. By default, all keypoints are written into the dataset
# in this example we omit all keypoints relating to wings. Refer to the base_rig documentation for naming conventions
omit_labels = ['w_1_l', 'w_1_l_end', 'w_2_l', 'w_2_l_end', 'w_1_r', 'w_1_r_end', 'w_2_r', 'w_2_r_end', 'root']

# for simplicity we'll assume that at this stage all subjects use the same armature and therefore report the same keypoints
# we therefore load the first sample from the list and find the subjects keypoint hierarchy
sample_file = open(dataset_data[0])

# returns JSON object as a dictionary
sample = json.load(sample_file)
sample_file.close()

first_entry_key = list(sample["iterationData"]["subject Data"][0].keys())[0]
labels = list(sample["iterationData"]["subject Data"][0][first_entry_key]["keypoints"].keys())

# show all used labels:
print("\nAll labels:  ",labels)

print("\nOmitting labels:  ", omit_labels)

# removing all occurences of omitted labels from the labels list to be used as keys below
labels = [x for x in labels if x not in omit_labels]

print("\nUsing labels:  ", labels)


All labels:   ['b_t', 'b_a_1', 'b_a_2', 'b_a_3', 'b_a_4', 'b_a_5', 'l_1_co_r', 'l_1_tr_r', 'l_1_fe_r', 'l_1_ti_r', 'l_1_ta_r', 'l_1_pt_r', 'l_2_co_r', 'l_2_tr_r', 'l_2_fe_r', 'l_2_ti_r', 'l_2_ta_r', 'l_2_pt_r', 'l_3_co_r', 'l_3_tr_r', 'l_3_fe_r', 'l_3_ti_r', 'l_3_ta_r', 'l_3_pt_r', 'w_1_r', 'w_2_r', 'l_1_co_l', 'l_1_tr_l', 'l_1_fe_l', 'l_1_ti_l', 'l_1_ta_l', 'l_1_pt_l', 'l_2_co_l', 'l_2_tr_l', 'l_2_fe_l', 'l_2_ti_l', 'l_2_ta_l', 'l_2_pt_l', 'l_3_co_l', 'l_3_tr_l', 'l_3_fe_l', 'l_3_ti_l', 'l_3_ta_l', 'l_3_pt_l', 'w_1_l', 'w_2_l', 'b_h', 'ma_r', 'an_1_r', 'an_2_r', 'an_3_r', 'ma_l', 'an_1_l', 'an_2_l', 'an_3_l']

Omitting labels:   ['w_1_l', 'w_1_l_end', 'w_2_l', 'w_2_l_end', 'w_1_r', 'w_1_r_end', 'w_2_r', 'w_2_r_end', 'root']

Using labels:   ['b_t', 'b_a_1', 'b_a_2', 'b_a_3', 'b_a_4', 'b_a_5', 'l_1_co_r', 'l_1_tr_r', 'l_1_fe_r', 'l_1_ti_r', 'l_1_ta_r', 'l_1_pt_r', 'l_2_co_r', 'l_2_tr_r', 'l_2_fe_r', 'l_2_ti_r', 'l_2_ta_r', 'l_2_pt_r', 'l_3_co_r', 'l_3_tr_r', 'l_3_fe_r', 'l_3_ti_r', 'l

Now that we have loaded data and colony info we can start plotting bounding boxes on top of their respective images

In [5]:
# transform between sRGB and linear colour space (optional)

def to_linear(srgb):
    linear = np.float32(srgb) / 255.0
    less = linear <= 0.04045
    linear[less] = linear[less] / 12.92
    linear[~less] = np.power((linear[~less] + 0.055) / 1.055, 2.4)
    return linear * 255.0

    
def from_linear(linear):
    srgb = linear.copy()
    less = linear <= 0.0031308
    srgb[less] = linear[less] * 12.92
    srgb[~less] = 1.055 * np.power(linear[~less], 1.0 / 2.4) - 0.055
    return srgb * 255.0

In [6]:
# let's create a big dictionary to store all our dataset info and
# then dump it into a sexy COCO-conform json file
# based on the documentation : https://www.immersivelimit.com/tutorials/create-coco-annotations-from-scratch
coco_data = {}

from datetime import datetime
date = datetime.today().strftime('%d.%m.%Y')

# edit any "info", "license", or "category" data here:
coco_data["info"] = {
        "description": "COCO_Style_FARTS_example_dataset",
        "url": "https://evo-biomech.ic.ac.uk/",
        "version": "1.0",
        "year": datetime.today().year,
        "contributor": "Fabian Plum, Rene Bulla, David Labonte",
        "date_created": date}

coco_data["licenses"] = [
        {
            "url": "http://creativecommons.org/licenses/by/4.0/",
            "id": 1,
            "name": "Attribution License"
        }
    ]

coco_data["categories"] = []

# we need to explicitly define the following dictionary, to include relationship between keypoints as a skeleton
for s,sbj in enumerate(subject_class_names):
    coco_data["categories"].append(
            {
                "supercategory": "insect",
                "id": s + 1,
                "name": sbj,
                "keypoints":['b_t', 'b_a_1', 'b_a_2', 'b_a_3','b_a_4', 'b_a_5',
                             'l_1_co_r', 'l_1_tr_r', 'l_1_fe_r', 'l_1_ti_r', 'l_1_ta_r','l_1_pt_r', 
                             'l_2_co_r', 'l_2_tr_r', 'l_2_fe_r', 'l_2_ti_r', 'l_2_ta_r', 'l_2_pt_r', 
                             'l_3_co_r', 'l_3_tr_r', 'l_3_fe_r', 'l_3_ti_r', 'l_3_ta_r', 'l_3_pt_r',
                             'l_1_co_l', 'l_1_tr_l', 'l_1_fe_l', 'l_1_ti_l', 'l_1_ta_l', 'l_1_pt_l', 
                             'l_2_co_l', 'l_2_tr_l', 'l_2_fe_l', 'l_2_ti_l', 'l_2_ta_l', 'l_2_pt_l',
                             'l_3_co_l', 'l_3_tr_l', 'l_3_fe_l', 'l_3_ti_l', 'l_3_ta_l', 'l_3_pt_l',
                             'b_h', 
                             'ma_r',
                             'an_1_r', 'an_2_r', 'an_3_r',
                             'ma_l',
                             'an_1_l', 'an_2_l', 'an_3_l'
                            ],
                "skeleton":[
                    [1,2],[2,3],[3,4],[4,5],[5,6],
                    [1,7],[7,8],[8,9],[9,10],[10,11],[11,12],
                    [1,13],[13,14],[14,15],[15,16],[16,17],[17,18],
                    [1,19],[19,20],[20,21],[21,22],[22,23],[23,24],
                    [1,25],[25,26],[26,27],[27,38],[28,29],[29,30],
                    [1,31],[31,32],[32,33],[33,34],[34,35],[35,36],
                    [1,37],[37,38],[38,39],[39,40],[40,41],[41,42],
                    [1,43],
                    [1,44],
                    [1,45],[45,46],[46,47],
                    [1,48],
                    [1,49],[49,50],[50,51]
                ]
            }
        )
    
    
"""  

### COMPLETE SKELETON WHEN INCLUDING WINGS ###

for s,sbj in enumerate(subject_class_names):
    coco_data["categories"].append(
            {
                "supercategory": "insect",
                "id": s + 1,
                "name": sbj,
                "keypoints":['b_t', 'b_a_1', 'b_a_2', 'b_a_3',
                             'b_a_4', 'b_a_5', 'b_a_5_end', 'l_1_co_r',
                             'l_1_tr_r', 'l_1_fe_r',  'l_1_ti_r', 'l_1_ta_r', 
                             'l_1_pt_r', 'l_1_pt_r_end', 'l_2_co_r', 'l_2_tr_r', 
                             'l_2_fe_r', 'l_2_ti_r', 'l_2_ta_r', 'l_2_pt_r', 
                             'l_2_pt_r_end', 'l_3_co_r', 'l_3_tr_r', 'l_3_fe_r', 
                             'l_3_ti_r', 'l_3_ta_r', 'l_3_pt_r', 'l_3_pt_r_end',
                             'w_1_r', 'w_1_r_end',  'w_2_r', 'w_2_r_end',
                             'l_1_co_l', 'l_1_tr_l', 'l_1_fe_l', 'l_1_ti_l',
                             'l_1_ta_l', 'l_1_pt_l', 'l_1_pt_l_end', 'l_2_co_l', 
                             'l_2_tr_l', 'l_2_fe_l', 'l_2_ti_l', 'l_2_ta_l',
                             'l_2_pt_l', 'l_2_pt_l_end', 'l_3_co_l', 'l_3_tr_l',
                             'l_3_fe_l', 'l_3_ti_l', 'l_3_ta_l', 'l_3_pt_l',
                             'l_3_pt_l_end', 'w_1_l', 'w_1_l_end', 'w_2_l',
                             'w_2_l_end', 'b_h', 'ma_r', 'ma_r_end',
                             'an_1_r', 'an_2_r', 'an_3_r', 'an_3_r_end',
                             'ma_l', 'ma_l_end', 'an_1_l', 'an_2_l', 
                             'an_3_l', 'an_3_l_end'],
                "skeleton":[
                    [2,1],[3,2],[4,3],
                    [5,4],[6,5],[7,6],
                    [9,8],[10,9],[11,10],[12,11],
                    [13,12],[14,13],[16,15],
                    [17,16],[18,17],[19,18],[20,19],
                    [21,20],[23,22],[24,23],
                    [25,24],[26,25],[27,26],[28,27],
                    [30,29],[32,31],
                    [34,33],[35,34],[36,35],
                    [37,36],[38,37],[39,38],
                    [41,40],[42,41],[43,42],[44,43],
                    [45,44],[46,45],[48,47],
                    [49,48],[50,49],[51,50],[52,51],
                    [53,52],[55,54],
                    [57,56],[58,1],[59,58],[60,59],
                    [61,58],[62,61],[63,62],[64,63],
                    [65,58],[66,65],[67,58],[68,67],
                    [69,68],[70,69]
                ]
            }
        )
"""

# when adding images in the next step the following info needs to be given:
coco_data["images"] = []

# FORMATTING NOTES ["images"]

"""
"images": [
    {
        "id": ###### (-> generated ID, use i, needs to the same for annoations),
        "license": 1,
        "width": display_img.shape[0],
        "height": display_img.shape[0],
        "file_name": img.split('/')[-1][:-4] + "_synth" + ".JPG"
    },
    ...
"""

# FORMATTING NOTES ["annotations"]

"""
"annotations": [
    {
        "segmentation": [[x0,y0,x1,y1...xn,yn][x_0,y_0,...x_n,y_n]] (-> coordinates of mask outline, if seperated, multiple arrays can be passed),
        "area": #### (-> = to the sum of pixels inside the mask),
        "iscrowd": 0 (as we treat all individuals seperately),
        "image_id": # (-> = i when iterating over all images),
        "bbox": [bbox[0], bbox[1], bbox[2]-bbox[0], bbox[3]-bbox[1]] (-> unlike darknet the original (sub-)pixel values are used here),
        "category_id": 1 (-> for now there is only one category, replace with class ID for multi class),
        "id": ##### (-> separate counter to i and im)
    },
"""

# each individual in the dataset is treated as a sparate annotation with a corresponding image ID
coco_data["annotations"] = []

In [7]:
def getThreads():
    """ Returns the number of available threads on a posix/win based system """
    if sys.platform == 'win32':
        return int(os.environ['NUMBER_OF_PROCESSORS'])
    else:
        return int(os.popen('grep -c cores /proc/cpuinfo').read())

class exportThread(threading.Thread):
    def __init__(self, threadID, name, q):
        threading.Thread.__init__(self)
        self.threadID = threadID
        self.name = name
        self.q = q

    def run(self):
        print("Starting " + self.name)
        process_detections(self.name, self.q)
        print("Exiting " + self.name)
        
def createThreadList(num_threads):
    threadNames = []
    for t in range(num_threads):
        threadNames.append("Thread_" + str(t))

    return threadNames

def process_detections(threadName, q):
    while not exitFlag_export:
        queueLock.acquire()
        if not workQueue_export.empty():
            
            data_input = q.get()
            i, data_loc, img, ID = data_input
            queueLock.release()
            
            display_img = cv2.imread(img)
            display_img_orig = display_img.copy()
            
            # compute visibility for each individual
            seg_img = cv2.imread(ID)
            seg_img_display = seg_img.copy()
            
            data_file = open(data_loc)
            # returns JSON object as a dictionary
            data = json.load(data_file)
            data_file.close()
            
            img_shape = display_img.shape
            
            # only add images that contain visibile individuals
            is_empty = True
            
            img_name = target_dir + "/data/" + img.split('/')[-1][:-4] + "_synth" + ".jpg"

            img_info = []
            
            # check if the size of the image and segmentation pass match
            if display_img.shape != seg_img.shape:
                print("Size mismatch of image and segmentation pass for sample",data_input[1].split("/")[-1],"!")
            else:
                for individual in data["iterationData"]["subject Data"]:
                    ind_key = list(individual.keys())[0]
                    ind_ID = int(ind_key)
                    # WARNING ID numbering begins at 1

                    fontColor = (int(ID_colours[ind_ID,0]),
                                 int(ID_colours[ind_ID,1]),
                                 int(ID_colours[ind_ID,2]))
                    
                    bbox_orig = [individual[ind_key]["2DBounds"]["xmin"],
                                 individual[ind_key]["2DBounds"]["ymin"],
                                 individual[ind_key]["2DBounds"]["xmax"],
                                 individual[ind_key]["2DBounds"]["ymax"]]
                    
                    bbox = fix_bounding_boxes(bbox_orig, max_val=display_img.shape)
                    
                    # only process an individual if its bounding box width and height are not zero
                    if bbox[2] - bbox[0] == 0 or bbox[3] - bbox[1] == 0:
                        continue


                    contours_lowpoly = []

                    try:
                        ID_mask = cv2.inRange(seg_img[bbox[1]:bbox[3],bbox[0]:bbox[2]], np.array([0, 0, ind_ID - 2]), np.array([0, 0, ind_ID + 2]))
                        indivual_occupancy = cv2.countNonZero(ID_mask)

                        # the kernel size for both dilation and median blur are to be determined by the bbounding boxes relative size
                        rel_size = ((bbox[2] - bbox[0]) / display_img.shape[0] + (bbox[3] - bbox[1]) / display_img.shape[0]) / 2
                        # values range from 0 (tiny) to 1 (huge)
                        # required smoothing 5 to 95
                        rel_size_root = int(round((15 * rel_size)/2.)*2 + 1) # round to next odd integer
                        #print("img:", i, "individual:", im, "rel_size", rel_size, rel_size_root)

                        # to simplify the generated masks and counter compression artifacts the original mask is dilated
                        # https://docs.opencv.org/3.4/db/df6/tutorial_erosion_dilatation.html
                        kernel = np.ones((rel_size_root, rel_size_root), 'uint8')
                        ID_mask_dilated = cv2.dilate(ID_mask, kernel, iterations=1)
                        # use median blur to further smooth the edges of the binary mask
                        ID_mask_dilated = cv2.medianBlur(ID_mask_dilated,rel_size_root)

                        # pad segmentation subwindow to prevent contours from being cut off
                        """
                        pad_width = 20
                        ID_mask_dilated_padded = np.zeros([ID_mask_dilated.shape[0] + pad_width * 2 , ID_mask_dilated.shape[1] + pad_width * 2], 'uint8')
                        ID_mask_dilated_padded[pad_width:-pad_width,pad_width:-pad_width] = ID_mask_dilated
                        """

                        # find contours using cv2.CHAIN_APPROX_SIMPLE to minimise the number of control points
                        # use cv2.RETR_EXTERNAL instead of cv2.RETR_TREE to only return the outer most contours
                        # depending on the version of openCV the function findContours additionally returns the image
                        try:
                            contours, hierarchy = cv2.findContours(ID_mask_dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_KCOS)
                        except:
                            useless_img, contours, hierarchy = cv2.findContours(ID_mask_dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_TC89_KCOS)
                        # now sort contours by area and only keep the 4 largest parts 
                        contours = sorted(contours, key=cv2.contourArea, reverse=True)
                        if len(contours) > 3:
                            contours = contours[:4]

                        # finally we simplify the generated contours to decrease memory usage
                        # and fascilitate correct processing, using polygon approximation
                        for contour in contours:
                            # decrease epsilon for finer contours
                            contours_lowpoly.append(cv2.approxPolyDP(contour, epsilon=1, closed=True))


                        if len(threadList_export) == 1:
                            print("\nindividual",im,ID_mask_dilated.dtype)
                            print(hierarchy)
                            # draw the contours on the empty image
                            seg_img_display = seg_img.copy()
                            cv2.imshow("mask: ", ID_mask_dilated)
                            cv2.drawContours(seg_img_display[bbox[1]:bbox[3],bbox[0]:bbox[2]], contours, -1, (255,0,0), 3)
                            cv2.imshow("segmentation: ", seg_img_display[bbox[1]:bbox[3],bbox[0]:bbox[2]])
                            cv2.waitKey(1)

                    except:
                        indivual_occupancy = 1

                    #indivual_occupancy = np.count_nonzero((seg_img == [0, 0, int((individual[0]/len(colony['ID']))*255)]).all(axis = 2)) + np.count_nonzero((seg_img == [0, 0, int((individual[0]/len(colony['ID']))*255 - 1)]).all(axis = 2)) + np.count_nonzero((seg_img == [0, 0, int((individual[0]/len(colony['ID']))*255 + 1)]).all(axis = 2))
                    bbox_area = abs((bbox[2] - bbox[0]) * (bbox[3] - bbox[1])) + 1
                    bbox_occupancy = indivual_occupancy / bbox_area
                    #print("Individual", individual[0], "with bounding box occupancy ",bbox_occupancy)

                    if not enforce_single_class:
                        class_ID = subject_classes[colony['Subject Variations'][ind_key]["Class"].replace(" ","_")]
                    else:
                        # here we use a single class, otherwise this can be replaced by size / scale values
                        class_ID = 0
                        
                    #cv2.putText(display_img, "ID: " + str(int(individual[0])), (bbox[0] + 10,bbox[3] - 10), font, fontScale, fontColor, lineType)
                    if bbox_occupancy > visibility_threshold:
                        #cv2.rectangle(display_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), fontColor, 2)

                        # collect all joint info and convert into COCO readable format
                        # "keypoints" are arrays of length 3K, K is the total number of key points defined for a class 
                        # [x, y, v] with the key point visibility v:

                        # v=0   Indicates that this key point is not marked (in this case x=y=v=0）
                        # v=1   Indicates that this key point is marked but not visible(Obscured)
                        # v=2   Indicates that this key point is marked and visible at the same time

                        # let's binarise the image and dilate it to make sure all visible keypoints are found
                        if not multi_animal:
                            seg_bin = cv2.inRange(seg_img, np.array([0,0, 1]), np.array([0,0, 3]))
                        else:
                            seg_bin = cv2.inRange(seg_img, np.array([0,0, ind_ID - 1]), np.array([0,0, ind_ID + 1]))
                        kernel = np.ones((5,5), np.uint8)
                        seg_bin_dilated = cv2.dilate(seg_bin,kernel,iterations = 2)

                        keypoints = []
                        img_shape = display_img.shape
                        for point in range(len(labels)):
                            # check if point is located within the image
                            if individual[ind_key]["keypoints"][labels[point]]["2DPos"]["x"] > img_shape[0] or individual[ind_key]["keypoints"][labels[point]]["2DPos"]["x"] < 0 or individual[ind_key]["keypoints"][labels[point]]["2DPos"]["y"] > img_shape[1] or individual[ind_key]["keypoints"][labels[point]]["2DPos"]["y"] < 0:
                                keypoints.extend([0,0,0]) # x=y=v=0 -> ignore keypoint
                            else:
                                # if it is, check its visibility
                                x_temp = int(individual[ind_key]["keypoints"][labels[point]]["2DPos"]["x"])
                                y_temp = int(individual[ind_key]["keypoints"][labels[point]]["2DPos"]["y"])  
                                if seg_bin_dilated[y_temp,x_temp] == 255:                   
                                    visibility_pt = 2 # point is visible
                                else:
                                    visibility_pt = 1 # point is marked but obstructed

                                keypoints.extend([x_temp,y_temp,visibility_pt])
                            # cv2.circle(display_img, (int(individual[point*2 + 5]),int(individual[point*2 + 6])), radius=3, color=fontColor, thickness=-1)
                            # let's see of this is really the centre



                        if generate_dataset:
                            # now we need to convert all the info into the desired format.
                            segmentation_mask= []

                            new_bbox = [display_img.shape[0],display_img.shape[1],0,0]
                            mask_area = 0

                            if len(contours_lowpoly) != 0:
                                for contour in contours_lowpoly:
                                    mask_area += cv2.contourArea(contour)
                                    sub_mask = []
                                    for coords in contour:
                                        sub_mask_x = int(bbox[0] + coords[0,0])
                                        sub_mask_y = int(bbox[1] + coords[0,1])
                                        sub_mask.append(sub_mask_x)
                                        sub_mask.append(sub_mask_y)

                                        if sub_mask_x < new_bbox[0]:
                                            new_bbox[0] = sub_mask_x
                                        if sub_mask_x > new_bbox[2]:
                                            new_bbox[2] = sub_mask_x

                                        if sub_mask_y < new_bbox[1]:
                                            new_bbox[1] = sub_mask_y
                                        if sub_mask_y > new_bbox[3]:
                                            new_bbox[3] = sub_mask_y

                                    if len(sub_mask) >= 8:
                                        # only include polygons with at least 4 vertices
                                        segmentation_mask.append(sub_mask)
                                        is_empty = False

                            # now that we have a clean segmentation mask, we can refine the bounding box as well

                            if not is_empty:
                                coco_data["annotations"].append({
                                        "segmentation": segmentation_mask, # (-> coordinates of mask outline, if seperated, multiple arrays can be passed),
                                        "area": mask_area, # (-> = to the sum of pixels inside the mask),
                                        "iscrowd": 0, #(as we treat all individuals seperately),
                                        "image_id":  i, # (-> = i when iterating over all images),
                                        "bbox": [new_bbox[0], new_bbox[1], new_bbox[2]-new_bbox[0], new_bbox[3]-new_bbox[1]], # (-> unlike darknet the original (sub-)pixel values are used here),
                                        "category_id": class_ID + 1, # (-> classes start at 1 in COCO, there is only one "supercategory" for now),
                                        "id": int(str(i) + "000" + str(ind_ID)), # (-> joining i and ind_ID)
                                        "keypoints": keypoints
                                        })

                    else:
                        pass
                        # create mask to highlight low visibility animals
                        #blk = np.zeros(display_img.shape, np.uint8)
                        #cv2.rectangle(blk, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 0, 255), cv2.FILLED)

                        # display original bounding box
                        #cv2.rectangle(display_img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), fontColor, 2)
                        # add text to discarded ID
                        #cv2.putText(display_img, "OCCLUDED", (bbox[0] + 10,bbox[3] - 35), font, fontScale, (0,0,255), lineType)

                        # blend the mask with original image
                        #display_img = cv2.addWeighted(display_img, 1.0, blk, 0.25, 1)

                        #print("Individual", int(individual[0]), "has been discarded due to excessive occlusion.")
                        #print("expected:",int((individual[0]/len(colony['ID']))*255))



            # uncomment to show resulting bounding boxes and masks
            if len(threadList_export) == 1:
                cv2.imshow("segmentation: " ,cv2.resize(seg_img_display, (int(seg_img.shape[1] / 2), 
                                                                  int(seg_img.shape[0] / 2))))
                cv2.imshow("labeled image", cv2.resize(display_img, (int(display_img.shape[1] / 2), 
                                                                     int(display_img.shape[0] / 2))))
                cv2.waitKey(1)
            
            
            if not is_empty:
                coco_data["images"].append({
                        "id": i,
                        "license": 1,
                        "width": display_img.shape[0],
                        "height": display_img.shape[1],
                        "file_name": img.split('/')[-1][:-4] + "_synth" + ".JPG"
                    }
                )
                cv2.imwrite(img_name, display_img)
                print("Saved", img_name)
            
        else:
            queueLock.release()
            
# setup as many threads as there are (virtual) CPUs
exitFlag_export = 0
threadList_export = createThreadList(getThreads())
print("Using", len(threadList_export), "threads for export...")
queueLock = threading.Lock()

# define paths to all images and set the maximum number of items in the queue equivalent to the number of images
workQueue_export = queue.Queue(len(dataset_img))
threads = []
threadID = 1

np.random.seed(seed=1)
ID_colours = np.random.randint(255, size=(255, 3))

font = cv2.FONT_HERSHEY_SIMPLEX
fontScale = 0.5
lineType = 2

# we can additionally plot the points in the data files to check joint locations
plot_joints = True

# remember to refine an export folder when saving out your dataset
generate_dataset = True

def fix_bounding_boxes(coords,max_val = [1024,1024]):
    # fix bounding box coordinates so they do not reach beyond the image
    fixed_coords = []
    for c, coord in enumerate(coords):
        if c == 0 or c == 2:
            max_val_temp = max_val[0]
        else:
            max_val_temp = max_val[1]
            
        if coord >= max_val_temp:
            coord = max_val_temp
        elif coord <= 0:
            coord = 0
        
        fixed_coords.append(int(coord))
        
    return fixed_coords

timer = time.time()

# create output folder for used images
if not os.path.exists(target_dir + "/data"):
    os.mkdir(target_dir + "/data")

# Create new threads
for tName in threadList_export:
    thread = exportThread(threadID, tName, workQueue_export)
    thread.start()
    threads.append(thread)
    threadID += 1

# Fill the queue with samples
queueLock.acquire()
for i, (data, img, ID) in enumerate(zip(dataset_data , dataset_img, dataset_ID)):
    workQueue_export.put([i, data, img, ID])
queueLock.release()

# Wait for queue to empty
while not workQueue_export.empty():
    pass

# Notify threads it's time to exit
exitFlag_export = 1

# Wait for all threads to complete
for t in threads:
    t.join()
print("Exiting Main export Thread")

# close all windows if they were opened
cv2.destroyAllWindows()

print("Total time elapsed:",time.time()-timer,"seconds")

Using 28 threads for export...
Starting Thread_0
Starting Thread_1
Starting Thread_2
Starting Thread_3
Starting Thread_4
Starting Thread_5
Starting Thread_6
Starting Thread_7
Starting Thread_8
Starting Thread_9
Starting Thread_10
Starting Thread_11
Starting Thread_12
Starting Thread_13
Starting Thread_14
Starting Thread_15
Starting Thread_16
Starting Thread_17
Starting Thread_18
Starting Thread_19
Starting Thread_20
Starting Thread_21
Starting Thread_22
Starting Thread_23
Starting Thread_24
Starting Thread_25
Starting Thread_26
Starting Thread_27
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0005_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0007_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0016_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0028_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0010_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0000_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0002_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data

Saved I:/FARTS/COCO_json/COCO/data/antmulti_0138_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0126_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0156_synth.jpg
Saved Saved I:/FARTS/COCO_json/COCO/data/antmulti_0081_synth.jpg
I:/FARTS/COCO_json/COCO/data/antmulti_0120_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0112_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0125_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0160_synth.jpg
Saved SavedI:/FARTS/COCO_json/COCO/data/antmulti_0130_synth.jpg 
I:/FARTS/COCO_json/COCO/data/antmulti_0146_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0140_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0149_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0158_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0086_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0159_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0163_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0161_synth.j

Saved I:/FARTS/COCO_json/COCO/data/antmulti_0285_synth.jpg
SavedSaved I:/FARTS/COCO_json/COCO/data/antmulti_0291_synth.jpg
 I:/FARTS/COCO_json/COCO/data/antmulti_0245_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0237_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0296_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0275_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0298_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0299_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0269_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0292_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0255_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0301_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0302_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0303_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0260_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0300_synth.jpg
Saved Saved I:/FARTS/COCO_json/COCO/data/antmulti_0258_s

Saved I:/FARTS/COCO_json/COCO/data/antmulti_0328_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0436_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0422_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0402_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0424_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0435_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0428_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0441_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0405_synth.jpg
SavedSaved I:/FARTS/COCO_json/COCO/data/antmulti_0400_synth.jpg
 I:/FARTS/COCO_json/COCO/data/antmulti_0433_synth.jpg
SavedSaved I:/FARTS/COCO_json/COCO/data/antmulti_0429_synth.jpg
 I:/FARTS/COCO_json/COCO/data/antmulti_0440_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0442_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0443_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0438_synth.jpg
SavedSaved I:/FARTS/COCO_json/COCO/data/antmulti_0415_sy

Saved I:/FARTS/COCO_json/COCO/data/antmulti_0554_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0568_synth.jpg
SavedSaved I:/FARTS/COCO_json/COCO/data/antmulti_0555_synth.jpg
 I:/FARTS/COCO_json/COCO/data/antmulti_0575_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0573_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0564_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0583_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0547_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0578_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0527_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0566_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0522_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0574_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0551_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0582_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0579_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0588_synth.j

Saved I:/FARTS/COCO_json/COCO/data/antmulti_0715_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0704_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0653_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0714_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0709_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0723_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0687_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0707_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0686_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0724_synth.jpg
SavedSaved I:/FARTS/COCO_json/COCO/data/antmulti_0684_synth.jpg
 I:/FARTS/COCO_json/COCO/data/antmulti_0708_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0721_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0659_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0730_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0726_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0727_synth.j

Saved I:/FARTS/COCO_json/COCO/data/antmulti_0822_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0853_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0845_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0841_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0851_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0855_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0784_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0816_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0844_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0858_synth.jpg
SavedSaved  I:/FARTS/COCO_json/COCO/data/antmulti_0842_synth.jpg
I:/FARTS/COCO_json/COCO/data/antmulti_0859_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0813_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0833_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0863_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0866_synth.jpg
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0869_synth.j

Saved I:/FARTS/COCO_json/COCO/data/antmulti_0988_synth.jpg
Exiting Thread_6
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0994_synth.jpg
Exiting Thread_13
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0983_synth.jpg
Exiting Thread_20
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0975_synth.jpg
Exiting Thread_9
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0981_synth.jpg
Exiting Thread_19
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0971_synth.jpg
Exiting Thread_10
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0978_synth.jpg
Exiting Thread_7
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0945_synth.jpg
Exiting Thread_0
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0966_synth.jpg
Exiting Thread_11
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0965_synth.jpg
Exiting Thread_18
Saved I:/FARTS/COCO_json/COCO/data/antmulti_0964_synth.jpg
Exiting Thread_5Saved I:/FARTS/COCO_json/COCO/data/antmulti_0992_synth.jpg
Exiting Thread_22

Saved I:/FARTS/COCO_json/COCO/data/antmulti_0993_synth.jpg
Exiting Thread_16
Save

Now, dump it all into one sexy **COCO style json** file

In [8]:
with open(target_dir + '/labels.json', 'w', encoding='utf-8') as outfile:
    json.dump(coco_data, outfile, ensure_ascii=False, indent=4)