In [None]:
from pathlib import Path
DIR_OUTPUT = Path('./testOutput')

In [None]:
dir_contents = Path.glob(DIR_OUTPUT, '*.txt')
for dc in dir_contents:
    print(dc.parts[-1])

In [None]:
def parse_synthetic_txt(path):
    '''
    Parses text files generated from synthetic data pipeline that found in the given directory
    
    Structure of text file:
        - each row corresponds to a tool in the object
        - values in row are space separated
        - first entry in row is the class label
        - rest of entries are bounding box coordinates in COCO format (x, y, w, h)
    Input:
        - path (Path object): directory where text files are found
    Output 
        - data_dict (dict): bounding box coordinates and labels for each image
            - Key: file_name of image w/o extension
                - sub-dictionary with Keys:
                - labels
                - bbox
            - unique_labels (set): unique set of labels found in directory
    '''
    data_dict = dict()
    unique_labels = set()
    dir_contents = Path.glob(path, '*.txt')
    for dc in dir_contents:
        meta_fn = dc.parts[-1]

        with open(Path.joinpath(path, meta_fn), 'r') as f:
            for line in f:
                meta_tool = line.rstrip().split(' ')
                label = meta_tool[0]
                bbox = tuple([int(num) for num in meta_tool[1:]])
            
                try:
                    data_dict[meta_fn[:-4]]['labels'].append(label)
                    data_dict[meta_fn[:-4]]['bbox'].append(bbox)
                except KeyError:
                    data_dict[meta_fn[:-4]] = {'labels': [label],
                                            'bbox': [bbox]
                                            }
                unique_labels.add(label)

    return data_dict, unique_labels

data_dict, unique_labels = parse_synthetic_txt(DIR_OUTPUT)
print(data_dict)
print(unique_labels)

In [None]:
def synthetic_labels2ints(data, unique_labels):
    '''
    YOLO model requires class labels to be integers, so convert the unique set of labels to integer classes

    Input:
        - data (dict): bounding box coordinates and labels for each image
            - Key: file_name of image w/o extension
                - sub-dictionary with Keys:
                - labels
                - bbox
        - unique_labels (set): unique set of labels found in directory
        
    Output: 
        - (dict): bounding box coordinates and labels as integers for each image
            - Key: file_name of image w/o extension
                - sub-dictionary with Keys:
                - labels
                - bbox
    '''
    data_dict = dict()
    unique_labels = list(unique_labels)
    # print(unique_labels)
    for key, value in data.items():
        # print(value['labels'])
        labels_ints = value['labels']
        for i, label in enumerate(unique_labels):
            labels_ints = list(map(lambda x: i if x == label else x, labels_ints))
        # print(labels_ints)

        data_dict[key] = {'labels': tuple(labels_ints),
                            'bbox': tuple(value['bbox'])
                            }
    return data_dict

data_dict2 = synthetic_labels2ints(data_dict, unique_labels)
print(data_dict2)
print(unique_labels)