# This script is used to automatically generate datasets formatted to be interpretable by DeepLabCut.

**WARNING :** Restrict the **Colony size** to a **maxmimum of 20 individuals**, otherwise the generation of datasets will fail, as the number of necessary columns would exceed the 64kb Metadata limit of the exported HDF5 file!

Ensure, that your **GPU** has sufficient **memory** for the chosen resolution! 

E.g. at 1024 px X 1024 px, training on an RTX 2080 Ti, set the **batchsize** to 4 in the **pose_cfg.yaml** file of your model.

In [1]:
import cv2
import pathlib

import numpy as np
import pandas as pd

from os import listdir
from os.path import isfile, join

In [2]:
# define location of dataset and return all files
dataset_location = "example_data/_input_multi"
target_dir = "example_data/DLC_HERO"

all_files = [f for f in listdir(dataset_location) if isfile(join(dataset_location, f))]

# next, sort files into images, depth maps, segmentation maps, data, and colony info
dataset_img = []
dataset_depth = []
dataset_seg = []
dataset_data = []
dataset_colony = dataset_location + "/ColonieInfo.csv"

for file in all_files:
    loc = dataset_location + "/" + file
    if file[-7:-4] == "Img":
        dataset_img.append(loc)
    elif file[-7:-4] == "Seg":
        dataset_seg.append(loc)
    elif file[-9:-4] == "Depth":
        dataset_depth.append(loc)
    elif file[-8:-4] == "Data":
        dataset_data.append(loc)
        
print("Found",len(all_files),"files...")

# next sort the colony info into its IDs to determine the colony size and individual scales
# one entry for each successive ID is read
from csv import reader

colony = {'seed': 0,
            'ID': [],
         'scale': [],
        'weight': []}

with open(dataset_colony, 'r') as colony_file:
        print("reading", file)
        # pass the file object to reader() to get the reader object
        csv_reader = reader(colony_file)
        # iterate over each row in the csv using reader object
        for r, row in enumerate(csv_reader):
            if r == 0:
                colony['seed'] = row[0].split("=")[-1]
            else:
                colony['ID'].append(row[0].split("=")[-1])
                colony['weight'].append(row[1].split("_")[1] + "-" + row[1].split("_")[2])
                colony['scale'].append(float(row[2].split("=")[-1]))

print("Loaded colony file with seed",colony['seed'],"and",len(colony['ID']),"individuals.")

# get provided classes to create a dictionary of class IDs and class names
subject_class_names = np.unique(np.array(colony["weight"]))
subject_classes = {}
for id,sbj in enumerate(subject_class_names):
    subject_classes[str(sbj)] = id
    
if len(colony['ID']) == 1:
    multi_animal = False
    print("Generating SINGLE-animal dataset!")
else:
    multi_animal = True
    print("Generating MULTI-animal dataset!")

print("\nA total of",len(subject_class_names),"unique classes have been found.")
print("The classes and respective class IDs are:\n",subject_classes,"\n")

Found 41 files...
reading ColonieInfo.csv
Loaded colony file with seed  123 and 10 individuals.
Generating MULTI-animal dataset!

A total of 5 unique classes have been found.
The classes and respective class IDs are:
 {'00011-atta': 0, '00501-atta': 1, 'Pogonomyrmex-desertorum1176': 2, 'Pogonomyrmex-desertorum1177': 3, 'Sungaya-inexpectata': 4} 



Now that we have the cleaned colony info, we can start loading the data associated with each frame.
For simplicity we will simply make this a list of lists as the number of individuals.

We will therefore access "data" as [frame] [individual] [attribute], where attributes will include [ID,bbox_x_0,bbox_y_0,...]

To train a multi-animal DeepLabCut network, we mostly care about joint positions and less about bounding boxes.

In [3]:
data = []

for file in dataset_data:
    # store all returned coordinates for each individual
    coords = []
        
    # open file in read mode
    with open(file, 'r') as read_obj:
        print("reading", file)
        # pass the file object to reader() to get the reader object
        csv_reader = reader(read_obj)
        # iterate over each row in the csv using reader object
        for row in csv_reader:
            # exclude camera projection row
            if not row[0].split(".")[0] == "camera_projection:":
                individual = [float(row[0].split(".")[0])]
                # row variable is a list that represents a row in csv
                for elem in row:
                    try:
                        # exclude 3D keypoint data
                        if elem.split("=")[0][-5:] != "world":
                            individual.append(float(elem.split("=")[-1]))
                    except ValueError:
                        pass
                coords.append(individual)
        
    data.append(coords)
    
print("\nThe dataset has a total of", len(data),"generated frames.")

reading example_data/_input_multi/10_Data.csv
reading example_data/_input_multi/1_Data.csv
reading example_data/_input_multi/2_Data.csv
reading example_data/_input_multi/3_Data.csv
reading example_data/_input_multi/4_Data.csv
reading example_data/_input_multi/5_Data.csv
reading example_data/_input_multi/6_Data.csv
reading example_data/_input_multi/7_Data.csv
reading example_data/_input_multi/8_Data.csv
reading example_data/_input_multi/9_Data.csv

The dataset has a total of 10 generated frames.


As there may be animals for which we don't use all bones we can return a list of all labels and exclude the respective locations from the pose data. As all animals use the same convention, we can simply read in one example and remove the corresponding indices from all animals.

In [4]:
# first open and read the first line from the first imported data file
labels = []
entries_found = False
entry = 0

while not entries_found:
    with open(dataset_data[entry], 'r') as read_obj:
        print("reading", read_obj.name)
        # pass the file object to reader() to get the reader object
        csv_reader = reader(read_obj)
        row_0 = next(csv_reader)  # gets the first line
        # iterate over each row in the csv using reader object
        if row_0[0][:3] != "cam":
            entries_found = True
            for elem in row_0:
                try:
                    if elem.split("=")[0][-5:] != "world":
                        labels.append((elem.split("=")[0].split("Bone.")[-1]))
                except ValueError:
                    pass
        else:
            print("No entries found! Reading next file... \n")
            entry += 1

# now let's define which labels NOT to use (in our case, all labels relating to wings)
# ... so that just means "omit all lables that start with 'w'"

omit_labels = [] # labels to exclude
matched_labels = [] # excluded label IDs
for l, label in enumerate(labels):
    if label[0] == "w":
        matched_labels.append(l)
        omit_labels.append(label)
        
print("\nCorresponding to the following indices:",matched_labels)

# show all used labels:
print("\nAll labels:")
print(labels)

reading example_data/_input_multi/10_Data.csv

Corresponding to the following indices: [60, 61, 62, 63, 64, 65, 66, 67, 110, 111, 112, 113, 114, 115, 116, 117]

All labels:
['0.BoundingBox.BoundMin.X', 'BoundingBox.BoundMin.Y', 'BoundingBox.BoundMax.X', 'BoundingBox.BoundMax.Y', 'b_t.X', 'b_t.Y', 'b_a_1.X', 'b_a_1.Y', 'b_a_2.X', 'b_a_2.Y', 'b_a_3.X', 'b_a_3.Y', 'b_a_4.X', 'b_a_4.Y', 'b_a_5.X', 'b_a_5.Y', 'b_a_5_end.X', 'b_a_5_end.Y', 'l_1_co_r.X', 'l_1_co_r.Y', 'l_1_tr_r.X', 'l_1_tr_r.Y', 'l_1_fe_r.X', 'l_1_fe_r.Y', 'l_1_ti_r.X', 'l_1_ti_r.Y', 'l_1_ta_r.X', 'l_1_ta_r.Y', 'l_1_pt_r.X', 'l_1_pt_r.Y', 'l_1_pt_r_end.X', 'l_1_pt_r_end.Y', 'l_2_co_r.X', 'l_2_co_r.Y', 'l_2_tr_r.X', 'l_2_tr_r.Y', 'l_2_fe_r.X', 'l_2_fe_r.Y', 'l_2_ti_r.X', 'l_2_ti_r.Y', 'l_2_ta_r.X', 'l_2_ta_r.Y', 'l_2_pt_r.X', 'l_2_pt_r.Y', 'l_2_pt_r_end.X', 'l_2_pt_r_end.Y', 'l_3_co_r.X', 'l_3_co_r.Y', 'l_3_tr_r.X', 'l_3_tr_r.Y', 'l_3_fe_r.X', 'l_3_fe_r.Y', 'l_3_ti_r.X', 'l_3_ti_r.Y', 'l_3_ta_r.X', 'l_3_ta_r.Y', 'l_3_pt_r.X', 

Now that we have loaded data and colony info we can start plotting bounding boxes on top of their respective images

In [5]:
# transform between sRGB and linear colour space (optional)

def to_linear(srgb):
    linear = np.float32(srgb) / 255.0
    less = linear <= 0.04045
    linear[less] = linear[less] / 12.92
    linear[~less] = np.power((linear[~less] + 0.055) / 1.055, 2.4)
    return linear * 255.0

    
def from_linear(linear):
    srgb = linear.copy()
    less = linear <= 0.0031308
    srgb[less] = linear[less] * 12.92
    srgb[~less] = 1.055 * np.power(linear[~less], 1.0 / 2.4) - 0.055
    return srgb * 255.0

In [6]:
# let's create a big list to store all our dataset info and
# save it all to the desired .csv and .h5 files for DeepLabCut to read.

all_points = np.zeros((len(data) * len(colony['ID']), ((len(labels) - len(matched_labels) -4))))
#	- scorer   #(just one, the only scorer is the generator)
#	- - individuals
#	- - - bodyparts
#	- - - - coords

print("Number of loaded samples:",len(data))
print("Colony size:",len(colony['ID']))
print("body parts:",int(((len(labels)-4) - len(matched_labels))/2)," (including X & Y coordinates)\n")
print("Resulting in an array of shape:",all_points.shape)

output_file_names = ["" for i in range(len(data) * len(colony['ID']))]

Number of loaded samples: 10
Colony size: 10
body parts: 62  (including X & Y coordinates)

Resulting in an array of shape: (100, 124)


In [7]:
# create unique colours for each ID
import numpy as np
import time

# alright. Let's take it from the top and fucking multi-thread this.
import threading
import queue
import sys
import os

def fix_bounding_boxes(coords,max_val = [1024,1024]):
    # fix bounding box coordinates so they do not reach beyond the image
    fixed_coords = []
    for c, coord in enumerate(coords):
        if c == 0 or c == 2:
            max_val_temp = max_val[0]
        else:
            max_val_temp = max_val[1]
            
        if coord >= max_val_temp:
            coord = max_val_temp
        elif coord <= 0:
            coord = 0
        
        fixed_coords.append(int(coord))
        
    return fixed_coords

def getThreads():
    """ Returns the number of available threads on a posix/win based system """
    if sys.platform == 'win32':
        return int(os.environ['NUMBER_OF_PROCESSORS'])
    else:
        return int(os.popen('grep -c cores /proc/cpuinfo').read())

class exportThread(threading.Thread):
    def __init__(self, threadID, name, q):
        threading.Thread.__init__(self)
        self.threadID = threadID
        self.name = name
        self.q = q

    def run(self):
        print("Starting " + self.name)
        process_detections(self.name, self.q)
        print("Exiting " + self.name)
        
def createThreadList(num_threads):
    threadNames = []
    for t in range(num_threads):
        threadNames.append("Thread_" + str(t))

    return threadNames

def process_detections(threadName, q):
    while not exitFlag_export:
        queueLock.acquire()
        if not workQueue_export.empty():
            
            data_input = q.get()
            i = data_input[0]
            img = data_input[1]
            queueLock.release()
            
            display_img = cv2.imread(img)
            display_img_orig = display_img.copy()
            
            img_shape = display_img.shape
            
            # only add images that contain visibile individuals
            is_empty = True

            img_info = []
                
            # compute visibility for each individual
            seg_img = cv2.imread(dataset_seg[i])
            seg_img_display = seg_img.copy()
            
            # compute visibility for each individual
            seg_img = cv2.imread(dataset_seg[i])
            
            # check if the size of the image and segmentation pass match
            if display_img.shape != seg_img.shape:
                print("Size mismatch of image and segmentation pass for sample",data_input[1].split("/")[-1],"!")
                for num in range(len(colony['ID'])):
                    incorrectly_formatted_images.append(i * len(colony['ID']) + num)
            else:
                for im, individual in enumerate(data[i]):
                    img_name = target_dir + "/" + img.split('/')[-1][:-4] + "_id" + str(im) + "_synth" + ".png"
                    # write the file path to the all_points array
                    output_file_names[i * len(colony['ID']) + im] = img_name
                    
                    fontColor = (int(ID_colours[int(individual[0]),0]),
                                 int(ID_colours[int(individual[0]),1]),
                                 int(ID_colours[int(individual[0]),2]))
                    bbox = fix_bounding_boxes(individual[1:5],max_val=display_img.shape)

                    # FOR SOME REASON OCCASIONALLY THE ID OF THE SEG FILE IS LOWER THAN THE DATA FILE
                    # with: ID = red_channel/255 * im
                    # red_channel = (ID/im) * 255
                    ID_red_val = int((individual[0]/len(colony['ID']))*255)

                    try:
                        ID_mask = cv2.inRange(seg_img[bbox[1]:bbox[3],bbox[0]:bbox[2]], np.array([0,0, ID_red_val - 2]), np.array([0,0, ID_red_val + 2]))
                        indivual_occupancy = cv2.countNonZero(ID_mask)
                    except:
                        indivual_occupancy = 0

                    #indivual_occupancy = np.count_nonzero((seg_img == [0, 0, int((individual[0]/len(colony['ID']))*255)]).all(axis = 2)) + np.count_nonzero((seg_img == [0, 0, int((individual[0]/len(colony['ID']))*255 - 1)]).all(axis = 2)) + np.count_nonzero((seg_img == [0, 0, int((individual[0]/len(colony['ID']))*255 + 1)]).all(axis = 2))
                    bbox_area = abs((bbox[2] - bbox[0]) * (bbox[3] - bbox[1])) + 1
                    bbox_occupancy = indivual_occupancy / bbox_area
                    #print("Individual", individual[0], "with bounding box occupancy ",bbox_occupancy)

                    #cv2.putText(display_img, "ID: " + str(int(individual[0])), (bbox[0] + 10,bbox[3] - 10), font, fontScale, fontColor, lineType)
                    if bbox_occupancy > visibility_threshold:
                        # let's binarise the image and dilate it to make sure all points that visible are found
                        seg_bin = cv2.inRange(seg_img, np.array([0,0, ID_red_val - 2]), np.array([0,245, ID_red_val + 2]))
                        kernel = np.ones((5,5),np.uint8)
                        seg_bin_dilated = cv2.dilate(seg_bin,kernel,iterations = 1)

                        output_cell = 0    
                        for point in range(int(len(individual[5:])/2)):
                            # get rid of all invalid points first. Those should simply stay NaN in the array
                            if point*2 + 4 in matched_labels:
                                pass
                            elif individual[point*2 + 5] > img_shape[0] or individual[point*2 + 5] < 0 or individual[point*2 + 6] > img_shape[1] or individual[point*2 + 6] < 0:
                                output_cell += 1
                            else:
                                # now throw the coordinates to the correct location
                                out_row = i * len(colony['ID']) + im
                                out_column = output_cell * 2
                                # exclude negative keypoints
                                if individual[point*2 + 5] < 0.1 or individual[point*2 + 6] < 0.1:
                                    individual[point*2 + 5] = 0 # X
                                    individual[point*2 + 6] = 0 # Y
                                # exlucde occluded keypoints by checking their visibility in the segmentation map   
                                if EXCLUDE_OCCLUDED_KEYPOINTS:
                                    if seg_bin_dilated[int(individual[6 + point*2]),int(individual[5 + point*2])] != 255:                   
                                        individual[point*2 + 5] = 0 # X
                                        individual[point*2 + 6] = 0 # Y
                                        
                                if resize_resolution is not None:
                                    factor_X = resize_resolution / (bbox[2] - bbox[0])
                                    rescaled_X = round((individual[point*2 + 5] - bbox[0]) * factor_X, 2)
                                    factor_Y = resize_resolution / (bbox[3] - bbox[1])
                                    rescaled_Y = round((individual[point*2 + 6] - bbox[1]) * factor_Y, 2)
                                    all_points[out_row][out_column] = rescaled_X
                                    all_points[out_row][out_column + 1] = rescaled_Y
                                else:                 
                                    all_points[out_row][out_column] = round(individual[point*2 + 5] - bbox[0], 2) # X
                                    all_points[out_row][out_column + 1] = round(individual[point*2 + 6] - bbox[1], 2) # Y

                                output_cell += 1
                        
                        if resize_resolution is not None:
                            display_img_crop = display_img[bbox[1]:bbox[3],bbox[0]:bbox[2]]
                            resized_display_img = cv2.resize(display_img_crop, 
                                                             (resize_resolution, resize_resolution), 
                                                             interpolation = cv2.INTER_CUBIC)
                            cv2.imwrite(img_name, resized_display_img)
                        else:
                            cv2.imwrite(img_name, display_img[bbox[1]:bbox[3],bbox[0]:bbox[2]])
                    else:
                        incorrectly_formatted_images.append(i * len(colony['ID']) + im)
                        
            
        else:
            queueLock.release()
            
# setup as many threads as there are (virtual) CPU cores
exitFlag_export = 0
# only use a fourth of the number of CPUs for export as hugin and enfuse utilise multi core processing in part
threadList_export = createThreadList(getThreads())
print("Using", len(threadList_export), "threads for export...")
queueLock = threading.Lock()

# define paths to all images and set the maximum number of items in the queue equivalent to the number of images
workQueue_export = queue.Queue(len(dataset_img))
threads = []
threadID = 1

# keep track of all incorrectly formatted images to remove them after iterating over all entries
incorrectly_formatted_images = []

np.random.seed(seed=1)
ID_colours = np.random.randint(255, size=(len(colony['ID']), 3))

font = cv2.FONT_HERSHEY_SIMPLEX
fontScale = 0.5
lineType = 2

# we can optionally remove occluded points from the dataframe
EXCLUDE_OCCLUDED_KEYPOINTS = False

# we can additionally plot the points in the data files to check joint locations
plot_joints = True

# remember to define an export folder when saving out your dataset
generate_dataset = True

# resize each sub window to a fixed resolution (set to None, if not desired)
resize_resolution = 300

# determine the proportion of a bounding box that needs to be filled before considering the visibility as too low
# WARNING: At the moment the ID shown in segmentation maps does not always correspond to the ID in the data file (off by 1)
visibility_threshold = 0.01

timer = time.time()

# Create new threads
for tName in threadList_export:
    thread = exportThread(threadID, tName, workQueue_export)
    thread.start()
    threads.append(thread)
    threadID += 1

# Fill the queue with stacks
queueLock.acquire()
for i,img in enumerate(dataset_img):
    workQueue_export.put([i, img])
queueLock.release()

# Wait for queue to empty
while not workQueue_export.empty():
    pass

# Notify threads it's time to exit
exitFlag_export = 1

# Wait for all threads to complete
for t in threads:
    t.join()
print("Exiting Main export Thread")

# close all windows if they were opened
cv2.destroyAllWindows()

# now, remove all incorrectly formatted imaged from the points and file list
incorrectly_formatted_images.sort()
print(incorrectly_formatted_images)
all_points = np.delete(all_points, incorrectly_formatted_images ,axis=0)
for r, rem_img in enumerate(incorrectly_formatted_images):
    del output_file_names[rem_img - r]

print("Total time elapsed:",time.time()-timer,"seconds")

Using 12 threads for export...
Starting Thread_0
Starting Thread_1
Starting Thread_2
Starting Thread_3
Starting Thread_4
Starting Thread_5
Starting Thread_6
Starting Thread_7
Starting Thread_8
Starting Thread_9
Starting Thread_10
Starting Thread_11
Size mismatch of image and segmentation pass for sample 3_Img.png !
Exiting Thread_8Exiting Thread_7
Exiting Thread_11

Exiting Thread_5
Size mismatch of image and segmentation pass for sample 9_Img.png !
Exiting Thread_0
Exiting Thread_3
Exiting Thread_4
Exiting Thread_9
Exiting Thread_6
Exiting Thread_10
Exiting Thread_1
Exiting Thread_2
Exiting Main export Thread
[0, 1, 3, 4, 8, 9, 13, 20, 21, 22, 23, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 53, 55, 56, 58, 59, 61, 63, 66, 67, 68, 70, 71, 74, 75, 76, 77, 78, 80, 82, 85, 88, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
Total time elapsed: 0.3858218193054199 seconds


Now, dump it all into one **DLC-conform pandas (.h5)** file

In [8]:
#example_DLC_df = pd.read_hdf("I:/FARTS/DeepLabCut-Multi-Animal/multi_ant_test_label-Fabi-2021-07-23/labeled-data/multi_animal_1080p/CollectedData_Fabi.h5")
#print(example_DLC_df.columns.get_level_values(2))
#example_DLC_df

In [9]:
# low let's create the required hierarchy
scorer = ["FARTS" for i in range(len(all_points[0]))]
individuals = []

for ind in range(len(colony['ID'])):
    individual = ["id_" + str(colony["weight"][ind]+"_num_"+str(ind)) for i in range(int((len(labels)-4) - len(matched_labels)))]
    individuals.extend(individual)
    
bodyparts_filtered = [item.split(".")[0] for item in labels if item not in omit_labels][4:]
bodyparts = []

for i in range(len(colony['ID'])):
    bodyparts.extend(bodyparts_filtered)
    
coords = []
for i in range(int(len(all_points[0])/2)):
    coords.extend(["x","y"])

Now that all elemts for the **Multi-Index** hierachy are defined, we can combine them into the **final dataframe**  

In [10]:
categories = [scorer, bodyparts, coords]
categories_tuples = list(zip(*categories))
columns = pd.MultiIndex.from_tuples(categories_tuples, names=["scorer",
                                                           "bodyparts",
                                                           "coords"])

final_dataframe = pd.DataFrame(all_points, index = output_file_names, columns=columns)
# convert all zeros, negative values, and those esxceeding the cropped image to NaN
final_dataframe[final_dataframe < 0] = 0 # set negative values to 0
if resize_resolution is not None:
    final_dataframe[final_dataframe > resize_resolution] = 0
final_dataframe = final_dataframe.replace(0, np.nan)

In [11]:
final_dataframe

scorer,FARTS,FARTS,FARTS,FARTS,FARTS,FARTS,FARTS,FARTS,FARTS,FARTS,FARTS,FARTS,FARTS,FARTS,FARTS,FARTS,FARTS,FARTS,FARTS,FARTS,FARTS
bodyparts,b_t,b_t,b_a_1,b_a_1,b_a_2,b_a_2,b_a_3,b_a_3,b_a_4,b_a_4,...,ma_l_end,ma_l_end,an_1_l,an_1_l,an_2_l,an_2_l,an_3_l,an_3_l,an_3_l_end,an_3_l_end
coords,x,y,x,y,x,y,x,y,x,y,...,x,y,x,y,x,y,x,y,x,y
example_data/DLC_HERO/10_Img_id2_synth.png,163.27,140.99,132.66,160.6,126.71,164.9,114.94,171.95,101.67,175.42,...,167.6,261.58,127.37,138.8,83.5,65.34,144.21,130.37,124.04,42.96
example_data/DLC_HERO/10_Img_id5_synth.png,165.35,177.37,135.92,142.38,120.3,133.23,112.29,128.1,107.31,128.03,...,207.14,180.13,179.46,187.3,184.0,170.87,174.95,193.48,169.39,184.3
example_data/DLC_HERO/10_Img_id6_synth.png,102.25,162.54,146.03,152.02,151.34,151.43,156.66,151.47,164.5,157.26,...,100.79,180.72,73.08,182.16,71.18,209.85,56.99,191.46,27.92,192.18
example_data/DLC_HERO/10_Img_id7_synth.png,89.62,165.07,136.99,178.68,141.61,182.05,149.96,184.26,158.58,186.17,...,86.04,161.17,67.4,164.92,50.11,189.47,36.66,169.82,36.44,186.6
example_data/DLC_HERO/1_Img_id0_synth.png,213.42,149.94,180.64,136.97,158.05,124.61,113.5,109.84,92.22,102.48,...,223.85,157.39,231.01,152.99,252.62,151.31,285.56,159.27,293.86,168.98
example_data/DLC_HERO/1_Img_id1_synth.png,145.07,94.35,156.88,147.77,162.5,153.37,167.27,163.2,163.76,172.64,...,120.03,80.2,128.08,66.75,106.16,48.27,80.04,51.16,83.03,36.77
example_data/DLC_HERO/1_Img_id2_synth.png,156.15,140.59,169.83,114.86,172.26,109.43,176.26,97.23,181.02,82.98,...,88.27,49.85,185.75,129.45,259.26,170.31,176.47,142.67,238.09,203.8
example_data/DLC_HERO/1_Img_id4_synth.png,201.14,150.89,168.34,142.68,145.73,134.31,107.66,117.61,88.48,110.59,...,208.41,149.0,216.78,147.46,237.41,142.26,251.69,139.33,246.13,152.71
example_data/DLC_HERO/1_Img_id5_synth.png,158.36,106.39,145.81,147.58,149.56,154.34,152.63,155.86,148.01,163.55,...,133.12,78.92,164.73,75.66,152.74,61.19,164.61,60.45,135.74,45.89
example_data/DLC_HERO/1_Img_id6_synth.png,114.3,127.46,161.74,146.64,181.77,145.94,193.34,154.22,203.28,162.87,...,76.78,156.43,92.54,124.63,98.85,139.49,82.53,159.73,62.45,169.35


In [12]:
final_dataframe.to_csv(os.path.join(target_dir, "CollectedData_FARTS.csv"))

# IF the function below fails, this is likely due to exceeding the number of columns supported by HDF5 files!
# Restrict the number of simulated animals to < 20 if the goal is to train a DLC network

final_dataframe.to_hdf(
    os.path.join(target_dir, "CollectedData_" + "FARTS" + ".h5"),
    "df_with_missing",
    format="table",
    mode="w")
    