# This script is used to automatically generate datasets formatted to be interpretable by DeepLabCut.

**WARNING :** Restrict the **Colony size** to a **maxmimum of 20 individuals**, otherwise the generation of datasets will fail, as the number of necessary columns would exceed the 64kb Metadata limit of the exported HDF5 file!

Ensure, that your **GPU** has sufficient **memory** for the chosen resolution! 

E.g. at 1024 px X 1024 px, training on an RTX 2080 Ti, set the **batchsize** to 4 in the **pose_cfg.yaml** file of your model.

In [1]:
import cv2
import pathlib
import json

import numpy as np
import pandas as pd
import matplotlib as plt

from os import listdir
from os.path import isfile, join

In [2]:
### REQUIRED ###

# define location of dataset and return all files
dataset_location = "I:/FARTS/LATEST/multi-ant-pose1"
target_dir = "I:/FARTS/DLC_ant_pose_mixed/labeled_data/synth"
SCORER = "Fabi"

### OPTIONAL ###

# set True to show processing results for each image (disables parallel processing)
DEBUG = False

# we can optionally remove occluded points from the dataframe
EXCLUDE_OCCLUDED_KEYPOINTS = True

enforce_single_class = True # overwrites multiple classes and groups all instances as one

In [3]:
all_files = [f for f in listdir(dataset_location) if isfile(join(dataset_location, f))]
all_files.sort()

# next, sort files into images, depth maps, segmentation maps, data, and colony info
# we only need the location and name of the data files, as all passes follow the same naming convention
dataset_data = []
dataset_img = []
dataset_ID = []
dataset_depth = []
dataset_norm = []
dataset_colony = None

for file in all_files:
    loc = dataset_location + "/" + file
    file_info = file.split("_")
    
    if file_info[1] == "BatchData":
        dataset_colony = loc
        
    elif len(file_info) == 2:
        # images are available in various formats, but annotation data is always written as json files
        if file_info[-1].split(".")[-1] == "json":
            dataset_data.append(loc)
        else:
            dataset_img.append(loc)
            
    elif file_info[2].split(".")[0] == "ID":
        dataset_ID.append(loc)
    elif file_info[2].split(".")[0]  == "depth":
        dataset_depth.append(loc)
    elif file_info[2].split(".")[0]  == "norm":
        dataset_norm.append(loc)
        
print("Found",len(dataset_data),"samples...")

# next sort the colony info into its IDs to determine the colony size and individual scales
# Opening colony (BatchData) JSON file
colony_file = open(dataset_colony)
 
# returns JSON object as a dictionary
colony = json.load(colony_file)
colony_file.close()


""" !!! requires IDs, model names, scales !!! """


if not enforce_single_class:
    # get provided classes to create a dictionary of class IDs and class names
    subject_class_names = np.unique(np.array(colony["Subject Variations"]))
    subject_classes = {}
    for id,sbj in enumerate(subject_class_names):
        subject_classes[str(sbj)] = id
else:
    subject_class_names = np.array([0])
    subject_classes = {"insect" : 0}

print("\nA total of",len(subject_class_names),"unique classes have been found.")
print("The classes and respective class IDs are:\n",subject_classes,"\n")


print("Loaded colony file with seed", colony['Seed']) #,"and",len(colony['ID']),"individuals.")
    
multi_animal = False # focussing only one animal in each image
print("Generating Single-animal dataset! Containing",len(colony['Subject Variations']),"individuals")

Found 10000 samples...

A total of 1 unique classes have been found.
The classes and respective class IDs are:
 {'insect': 0} 

Loaded colony file with seed 12345
Generating Single-animal dataset! Containing 20 individuals


Now that we have the cleaned colony info, we can start loading the data associated with each frame.
For simplicity we will simply make this a list of lists as the number of individuals.

We will therefore access "data" as [frame] [individual] [attribute], where attributes will include [ID,bbox_x_0,bbox_y_0,...]

To train a multi-animal DeepLabCut network, we mostly care about joint positions and less about bounding boxes.

As there may be animals for which we don't use all bones we can return a list of all labels and exclude the respective locations from the pose data. As all animals use the same convention, we can simply read in one example and remove the corresponding indices from all animals.

In [4]:
### REQUIRED ###
# specify which labels to ignore. By default, all keypoints are written into the dataset
# in this example we omit all keypoints relating to wings. Refer to the base_rig documentation for naming conventions
omit_labels = ['w_1_l', 'w_1_l_end', 'w_2_l', 'w_2_l_end', 'w_1_r', 'w_1_r_end', 'w_2_r', 'w_2_r_end', 'root']

# loading the first entry of first iteration file to retrieve skeleton info
exp_file = open(dataset_data[0])
exp_data = json.load(exp_file)
exp_file.close()

# for simplicity we'll assume that at this stage all subjects use the same armature and therefore report the same keypoints
first_entry_key = list(exp_data["iterationData"]["subject Data"][0].keys())[0]
labels = list(exp_data["iterationData"]["subject Data"][0][first_entry_key]["keypoints"].keys())

# show all used labels:
print("\nAll labels:",labels)

print("\nOmitting labels:", omit_labels)

# removing all occurences of omitted labels from the labels list to be used as keys below
labels = [x for x in labels if x not in omit_labels]

print("\nFinal labels:",labels)


All labels: ['b_t', 'b_a_1', 'b_a_2', 'b_a_3', 'b_a_4', 'b_a_5', 'l_1_co_r', 'l_1_tr_r', 'l_1_fe_r', 'l_1_ti_r', 'l_1_ta_r', 'l_1_pt_r', 'l_2_co_r', 'l_2_tr_r', 'l_2_fe_r', 'l_2_ti_r', 'l_2_ta_r', 'l_2_pt_r', 'l_3_co_r', 'l_3_tr_r', 'l_3_fe_r', 'l_3_ti_r', 'l_3_ta_r', 'l_3_pt_r', 'w_1_r', 'w_2_r', 'l_1_co_l', 'l_1_tr_l', 'l_1_fe_l', 'l_1_ti_l', 'l_1_ta_l', 'l_1_pt_l', 'l_2_co_l', 'l_2_tr_l', 'l_2_fe_l', 'l_2_ti_l', 'l_2_ta_l', 'l_2_pt_l', 'l_3_co_l', 'l_3_tr_l', 'l_3_fe_l', 'l_3_ti_l', 'l_3_ta_l', 'l_3_pt_l', 'w_1_l', 'w_2_l', 'b_h', 'ma_r', 'an_1_r', 'an_2_r', 'an_3_r', 'ma_l', 'an_1_l', 'an_2_l', 'an_3_l']

Omitting labels: ['w_1_l', 'w_1_l_end', 'w_2_l', 'w_2_l_end', 'w_1_r', 'w_1_r_end', 'w_2_r', 'w_2_r_end', 'root']

Final labels: ['b_t', 'b_a_1', 'b_a_2', 'b_a_3', 'b_a_4', 'b_a_5', 'l_1_co_r', 'l_1_tr_r', 'l_1_fe_r', 'l_1_ti_r', 'l_1_ta_r', 'l_1_pt_r', 'l_2_co_r', 'l_2_tr_r', 'l_2_fe_r', 'l_2_ti_r', 'l_2_ta_r', 'l_2_pt_r', 'l_3_co_r', 'l_3_tr_r', 'l_3_fe_r', 'l_3_ti_r', 'l_3_ta_

Now that we have loaded data and colony info we can start plotting bounding boxes on top of their respective images

In [5]:
# transform between sRGB and linear colour space (optional)

def to_linear(srgb):
    linear = np.float32(srgb) / 255.0
    less = linear <= 0.04045
    linear[less] = linear[less] / 12.92
    linear[~less] = np.power((linear[~less] + 0.055) / 1.055, 2.4)
    return linear * 255.0

    
def from_linear(linear):
    srgb = linear.copy()
    less = linear <= 0.0031308
    srgb[less] = linear[less] * 12.92
    srgb[~less] = 1.055 * np.power(linear[~less], 1.0 / 2.4) - 0.055
    return srgb * 255.0

In [6]:
# let's create a big list to store all our dataset info and
# save it all to the desired .csv and .h5 files for DeepLabCut to read.

all_points = np.zeros((len(dataset_data) * len(colony['Subject Variations']), len(labels)*2))
#	- scorer   #(just one, the only scorer is the generator)
#	- - individuals
#	- - - bodyparts
#	- - - - coords

print("Number of loaded samples:",len(dataset_data))
print("Colony size:",len(colony['Subject Variations']))
print("body parts:",int(len(labels)),"\n")
print("Resulting in an array of shape:",all_points.shape)

output_file_names = ["" for i in range(len(dataset_data) * len(colony['Subject Variations']))]

Number of loaded samples: 10000
Colony size: 20
body parts: 51 

Resulting in an array of shape: (200000, 102)


In [7]:
# create unique colours for each ID
import numpy as np
import time

# alright. Let's take it from the top and fucking multi-thread this.
import threading
import queue
import sys
import os

def fix_bounding_boxes(coords,max_val = [1024,1024]):
    # fix bounding box coordinates so they do not reach beyond the image
    fixed_coords = []
    for c, coord in enumerate(coords):
        if c == 0 or c == 2:
            max_val_temp = max_val[0]
        else:
            max_val_temp = max_val[1]
            
        if coord >= max_val_temp:
            coord = max_val_temp
        elif coord <= 0:
            coord = 0
        
        fixed_coords.append(int(coord))
        
    return fixed_coords

def getThreads():
    """ Returns the number of available threads on a posix/win based system """
    if sys.platform == 'win32':
        return int(os.environ['NUMBER_OF_PROCESSORS'])
    else:
        return int(os.popen('grep -c cores /proc/cpuinfo').read())

class exportThread(threading.Thread):
    def __init__(self, threadID, name, q):
        threading.Thread.__init__(self)
        self.threadID = threadID
        self.name = name
        self.q = q

    def run(self):
        print("Starting " + self.name)
        process_detections(self.name, self.q)
        print("Exiting " + self.name)
        
def createThreadList(num_threads):
    threadNames = []
    for t in range(num_threads):
        threadNames.append("Thread_" + str(t))

    return threadNames

def process_detections(threadName, q):
    while not exitFlag_export:
        queueLock.acquire()
        if not workQueue_export.empty():
            
            data_input = q.get()
            i, data_loc, img, ID = data_input
            queueLock.release()
            
            display_img = cv2.imread(img)
            display_img_orig = display_img.copy()
            
            # compute visibility for each individual
            seg_img = cv2.imread(ID)
            seg_img_display = seg_img.copy()
            
            data_file = open(data_loc)
            # returns JSON object as a dictionary
            data = json.load(data_file)
            data_file.close()
            
            img_shape = display_img.shape
            
            # only add images that contain visibile individuals
            is_empty = True

            img_info = []
            
            # check if the size of the image and segmentation pass match
            if img_shape != seg_img.shape:
                print("Size mismatch of image and segmentation pass for sample",data_input[1].split("/")[-1],"!")
                incorrectly_formatted_images.append(i)
            else:
                for im, individual in enumerate(data["iterationData"]["subject Data"]):
                    ind_key = list(individual.keys())[0]
                    ind_ID = int(ind_key)
                    # WARNING ID numbering begins at 1
                    
                    img_name = img.split('/')[-1][:-4] + "_id_" + str(im) + "_synth" + ".png"
                    # write the file path to the all_points array
                    output_file_names[i * len(colony['Subject Variations']) + im] = "labeled-data/" + str(os.path.basename(target_dir)) + "/" + img_name

                    fontColor = (int(ID_colours[ind_ID,0]),
                                 int(ID_colours[ind_ID,1]),
                                 int(ID_colours[ind_ID,2]))
                    
                    bbox_orig = [individual[ind_key]["2DBounds"]["xmin"],
                                 individual[ind_key]["2DBounds"]["ymin"],
                                 individual[ind_key]["2DBounds"]["xmax"],
                                 individual[ind_key]["2DBounds"]["ymax"]]
                    
                    bbox = fix_bounding_boxes(bbox_orig, max_val=display_img.shape)
                    
                    # only process an individual if its bounding box width and height are not zero
                    if bbox[2] - bbox[0] == 0 or bbox[3] - bbox[1] == 0:
                        continue

                    try:
                        ID_mask = cv2.inRange(seg_img[bbox[1]:bbox[3],bbox[0]:bbox[2]], np.array([0, 0, ind_ID - 2]), np.array([0, 0, ind_ID + 2]))
                        indivual_occupancy = cv2.countNonZero(ID_mask)
                    except:
                        if len(threadList) == 1: 
                            print("Individual fully occluded:",ind_ID,"in",dataset_seg[i])
                        indivual_occupancy = 1

                    #indivual_occupancy = np.count_nonzero((seg_img == [0, 0, int((individual[0]/len(colony['ID']))*255)]).all(axis = 2)) + np.count_nonzero((seg_img == [0, 0, int((individual[0]/len(colony['ID']))*255 - 1)]).all(axis = 2)) + np.count_nonzero((seg_img == [0, 0, int((individual[0]/len(colony['ID']))*255 + 1)]).all(axis = 2))
                    bbox_area = abs((bbox[2] - bbox[0]) * (bbox[3] - bbox[1])) + 1
                    bbox_occupancy = indivual_occupancy / bbox_area
                    bbox_ratio = abs((bbox[2] - bbox[0]) / (bbox[3] - bbox[1]))
                    #print("Individual", individual[0], "with bounding box occupancy ",bbox_occupancy)

                    #cv2.putText(display_img, "ID: " + str(int(individual[0])), (bbox[0] + 10,bbox[3] - 10), font, fontScale, fontColor, lineType)
                    # check that enough of the individual is visible to warrant producing a sample from it 
                    # - high enough visibility
                    # - large enough image patch
                    # - no extreme aspect ratios
                    if bbox_occupancy > visibility_threshold and bbox_area > (resize_resolution**2)/20 and bbox_ratio <= 3 and bbox_ratio >= 0.33:
                        # let's binarise the image and dilate it to make sure all points that visible are found
                        seg_bin = cv2.inRange(seg_img, np.array([0,0, ind_ID - 1]), np.array([0,0, ind_ID + 1]))
                        kernel = np.ones((5,5), np.uint8)
                        seg_bin_dilated = cv2.dilate(seg_bin,kernel,iterations = 2)
                        
                        for point in range(len(labels)):
                            # get rid of all invalid points first. Those should simply stay NaN in the array
                            if individual[ind_key]["keypoints"][labels[point]]["2DPos"]["x"] > img_shape[0] or individual[ind_key]["keypoints"][labels[point]]["2DPos"]["x"] < 0 or individual[ind_key]["keypoints"][labels[point]]["2DPos"]["y"] > img_shape[1] or individual[ind_key]["keypoints"][labels[point]]["2DPos"]["y"] < 0:
                                continue
                            else:
                                # now throw the coordinates to the correct location
                                out_row = i * len(colony['Subject Variations']) + im
                                out_column = point * 2
                                # exclude negative keypoints
                                if individual[ind_key]["keypoints"][labels[point]]["2DPos"]["x"] < 0.1 or individual[ind_key]["keypoints"][labels[point]]["2DPos"]["y"] < 0.1:
                                    individual[ind_key]["keypoints"][labels[point]]["2DPos"]["x"] = 0 # X
                                    individual[ind_key]["keypoints"][labels[point]]["2DPos"]["y"] = 0 # Y
                                # exlucde occluded keypoints by checking their visibility in the segmentation map   
                                if EXCLUDE_OCCLUDED_KEYPOINTS:
                                    x_temp = int(individual[ind_key]["keypoints"][labels[point]]["2DPos"]["x"])
                                    y_temp = int(individual[ind_key]["keypoints"][labels[point]]["2DPos"]["y"])           
                                    if seg_bin_dilated[y_temp,x_temp] == 0:  
                                        
                                        if DEBUG:
                                            display_img = cv2.circle(display_img, (x_temp,y_temp), radius=0, color=(0, 0, 255), thickness=2)
                                            cv2.imshow("missing points",display_img)
                                            cv2.waitKey(0)
                                        
                                        individual[ind_key]["keypoints"][labels[point]]["2DPos"]["x"] = 0 # X
                                        individual[ind_key]["keypoints"][labels[point]]["2DPos"]["y"] = 0 # Y
                                
                                if resize_resolution is not None:
                                    factor_X = resize_resolution / (bbox[2] - bbox[0])
                                    rescaled_X = round((individual[ind_key]["keypoints"][labels[point]]["2DPos"]["x"] - bbox[0]) * factor_X, 2)
                                    factor_Y = resize_resolution / (bbox[3] - bbox[1])
                                    rescaled_Y = round((individual[ind_key]["keypoints"][labels[point]]["2DPos"]["y"] - bbox[1]) * factor_Y, 2)
                                    all_points[out_row][out_column] = rescaled_X
                                    all_points[out_row][out_column + 1] = rescaled_Y
                                else:    
                                    all_points[out_row][out_column] = round(individual[ind_key]["keypoints"][labels[point]]["2DPos"]["x"] - bbox[0], 2) # X
                                    all_points[out_row][out_column + 1] = round(individual[ind_key]["keypoints"][labels[point]]["2DPos"]["y"] - bbox[1] ,2) # Y
                        
                        if resize_resolution is not None:
                            display_img_crop = display_img[bbox[1]:bbox[3],bbox[0]:bbox[2]]
                            resized_display_img = cv2.resize(display_img_crop, 
                                                             (resize_resolution, resize_resolution), 
                                                             interpolation = cv2.INTER_CUBIC)
                            cv2.imwrite(target_dir + "/" + img_name, resized_display_img)
                        else:
                            cv2.imwrite(target_dir + "/" + img_name, display_img[bbox[1]:bbox[3],bbox[0]:bbox[2]])
                    else:
                        incorrectly_formatted_images.append(i * len(colony['Subject Variations']) + im)
                        
            
        else:
            queueLock.release()
            
# setup as many threads as there are (virtual) CPU cores
exitFlag_export = 0
# only use a fourth of the number of CPUs for export as hugin and enfuse utilise multi core processing in part
threadList_export = createThreadList(getThreads())
print("Using", len(threadList_export), "threads for export...")
queueLock = threading.Lock()

# define paths to all images and set the maximum number of items in the queue equivalent to the number of images
workQueue_export = queue.Queue(len(dataset_img))
threads = []
threadID = 1

# keep track of all incorrectly formatted images to remove them after iterating over all entries
incorrectly_formatted_images = []

np.random.seed(seed=1)
ID_colours = np.random.randint(255, size=(255, 3))

font = cv2.FONT_HERSHEY_SIMPLEX
fontScale = 0.5
lineType = 2


# we can additionally plot the points in the data files to check joint locations
plot_joints = True

# remember to define an export folder when saving out your dataset
generate_dataset = True

# resize each sub window to a fixed resolution (set to None, if not desired)
resize_resolution = 300

# determine the proportion of a bounding box that needs to be filled before considering the visibility as too low
# WARNING: At the moment the ID shown in segmentation maps does not always correspond to the ID in the data file (off by 1)
visibility_threshold = 0.05

timer = time.time()

# Create new threads
for tName in threadList_export:
    thread = exportThread(threadID, tName, workQueue_export)
    thread.start()
    threads.append(thread)
    threadID += 1

# Fill the queue with samples
queueLock.acquire()
for i, (data, img, ID) in enumerate(zip(dataset_data , dataset_img, dataset_ID)):
    workQueue_export.put([i, data, img, ID])
queueLock.release()

# Wait for queue to empty
while not workQueue_export.empty():
    pass

# Notify threads it's time to exit
exitFlag_export = 1

# Wait for all threads to complete
for t in threads:
    t.join()
print("Exiting Main export Thread")

# close all windows if they were opened
cv2.destroyAllWindows()

# now, remove all incorrectly formatted imaged from the points and file list
incorrectly_formatted_images.sort()
print(incorrectly_formatted_images)

all_points = np.delete(all_points, incorrectly_formatted_images ,axis=0)
for r, rem_img in enumerate(incorrectly_formatted_images):
    del output_file_names[rem_img - r]

print("Total time elapsed:",time.time()-timer,"seconds")

Using 28 threads for export...
Starting Thread_0
Starting Thread_1
Starting Thread_2
Starting Thread_3
Starting Thread_4
Starting Thread_5
Starting Thread_6
Starting Thread_7
Starting Thread_8
Starting Thread_9
Starting Thread_10
Starting Thread_11
Starting Thread_12
Starting Thread_13
Starting Thread_14
Starting Thread_15
Starting Thread_16
Starting Thread_17
Starting Thread_18
Starting Thread_19
Starting Thread_20
Starting Thread_21
Starting Thread_22
Starting Thread_23
Starting Thread_24
Starting Thread_25
Starting Thread_26
Starting Thread_27
Exiting Thread_23
Exiting Thread_2Exiting Thread_14

Exiting Thread_16
Exiting Thread_13
Exiting Thread_9Exiting Thread_15

Exiting Thread_5
Exiting Thread_17
Exiting Thread_27Exiting Thread_20Exiting Thread_12Exiting Thread_26



Exiting Thread_6Exiting Thread_0

Exiting Thread_18
Exiting Thread_19
Exiting Thread_24
Exiting Thread_25Exiting Thread_8
Exiting Thread_11

Exiting Thread_22
Exiting Thread_7Exiting Thread_3

Exiting Thread_10
Exiti

Total time elapsed: 262.02771854400635 seconds


Now, dump it all into one **DLC-conform pandas (.h5)** file

In [8]:
#example_DLC_df = pd.read_hdf("I:/FARTS/DeepLabCut-Multi-Animal/multi_ant_test_label-Fabi-2021-07-23/labeled-data/multi_animal_1080p/CollectedData_Fabi.h5")
#print(example_DLC_df.columns.get_level_values(2))
#example_DLC_df

In [9]:
# next create the required hierarchy
scorer = [SCORER for i in range(len(all_points[0]))]
individuals = []

for ind in range(len(colony['Subject Variations'])):
    ### UPDATE ONCE ALL COLONY INFO IS INCLUDED ###
    individual = ["id_" + str(ind) +"_num_" + str(ind) for i in range(int((len(labels))*2))]
    individuals.extend(individual)
    
bodyparts_filtered = [i for j in labels for i in [j]*2]
bodyparts = []

for i in range(len(colony['Subject Variations'])):
    bodyparts.extend(bodyparts_filtered)

coords = []
for i in range(int(len(all_points[0])/2)):
    coords.extend(["x","y"])

Now that all elemts for the **Multi-Index** hierachy are defined, we can combine them into the **final dataframe**  

In [10]:
categories = [scorer, bodyparts, coords]
categories_tuples = list(zip(*categories))
columns = pd.MultiIndex.from_tuples(categories_tuples, names=["scorer",
                                                           "bodyparts",
                                                           "coords"])

final_dataframe = pd.DataFrame(all_points, index = output_file_names, columns=columns)
# convert all zeros, negative values, and those esxceeding the cropped image to NaN
final_dataframe[final_dataframe < 0] = 0 # set negative values to 0
if resize_resolution is not None:
    final_dataframe[final_dataframe > resize_resolution] = 0
final_dataframe = final_dataframe.replace(0, np.nan)
final_dataframe = final_dataframe.dropna(thresh=2)

In [11]:
final_dataframe

scorer,Fabi,Fabi,Fabi,Fabi,Fabi,Fabi,Fabi,Fabi,Fabi,Fabi,Fabi,Fabi,Fabi,Fabi,Fabi,Fabi,Fabi,Fabi,Fabi,Fabi,Fabi
bodyparts,b_t,b_t,b_a_1,b_a_1,b_a_2,b_a_2,b_a_3,b_a_3,b_a_4,b_a_4,...,an_3_r,an_3_r,ma_l,ma_l,an_1_l,an_1_l,an_2_l,an_2_l,an_3_l,an_3_l
coords,x,y,x,y,x,y,x,y,x,y,...,x,y,x,y,x,y,x,y,x,y
labeled-data/synth/multi-ant-pose1_00000_id_0_synth.png,277.47,109.56,205.66,172.97,185.42,189.39,162.48,207.75,137.85,223.73,...,266.33,134.62,261.03,69.79,276.18,55.10,,,,
labeled-data/synth/multi-ant-pose1_00000_id_2_synth.png,176.31,269.80,184.39,186.68,195.48,162.69,197.08,140.50,195.74,119.60,...,,,133.12,274.43,148.05,280.17,184.66,260.18,125.09,232.91
labeled-data/synth/multi-ant-pose1_00000_id_15_synth.png,232.72,201.45,178.78,158.44,158.86,141.42,145.24,130.78,133.30,122.42,...,,,256.87,235.10,271.19,228.42,,198.23,,
labeled-data/synth/multi-ant-pose1_00001_id_0_synth.png,35.15,146.27,111.55,93.02,132.45,80.05,156.49,64.53,180.68,53.90,...,16.24,197.86,40.68,198.61,28.39,201.76,26.57,247.54,82.38,
labeled-data/synth/multi-ant-pose1_00001_id_1_synth.png,133.40,177.86,135.15,103.04,133.79,76.59,133.43,59.51,131.03,37.76,...,55.95,,134.12,205.34,145.17,227.68,243.56,213.43,296.69,262.42
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
labeled-data/synth/multi-ant-pose1_09999_id_5_synth.png,242.13,157.72,124.59,171.15,108.24,177.05,89.33,180.47,38.69,175.55,...,247.18,166.00,255.20,138.28,272.26,131.46,290.97,90.45,,72.70
labeled-data/synth/multi-ant-pose1_09999_id_6_synth.png,259.51,101.50,198.83,131.88,189.01,139.53,172.21,148.05,154.30,157.33,...,261.44,227.15,234.69,97.38,242.55,75.27,,,,
labeled-data/synth/multi-ant-pose1_09999_id_13_synth.png,144.50,192.22,220.88,179.11,252.66,173.80,270.99,170.39,285.22,166.65,...,,,73.66,200.88,72.23,228.50,,,,
labeled-data/synth/multi-ant-pose1_09999_id_16_synth.png,84.92,136.55,152.67,98.02,170.05,89.58,190.93,79.08,208.54,74.68,...,0.94,167.42,61.00,194.36,57.24,198.28,69.63,259.88,111.29,


In [12]:
final_dataframe.to_csv(os.path.join(target_dir, "CollectedData_FARTS.csv"))

# IF the function below fails, this is likely due to exceeding the number of columns supported by HDF5 files!
# Restrict the number of simulated animals to < 20 if the goal is to train a DLC network

final_dataframe.to_hdf(
    os.path.join(target_dir, "CollectedData_" + "FARTS" + ".h5"),
    "df_with_missing",
    format="table",
    mode="w")
    