## Reduce the original labels into ones with necessary fields

In [3]:
import json
import os
from time import time

In [1]:
def loadLabelsFrom(path):
    if os.path.isdir(path) and not "_reduced" in path:
        with os.scandir(path) as entries:
            labels = []
            start = time()
            print(f"Load all json files in '{path}'")
            for entry in entries:
                if os.path.isfile(entry) and ".json" in entry.name:
                    with open(path + entry.name, 'r', encoding="UTF-8") as file:
                        json_object = json.load(file)
                        # Drop out any jsons based on our criteria.
                        clothes = json_object['metadata.clothes']
                        washing_method = clothes['metadata.clothes.washing_method']  # Do Not Washing: A, none: C 
                        drycleaning = clothes['metadata.clothes.drycleaning']  # DO NOT DRYCLEAN: B, none: D
                        if not ((washing_method == 'Do Not Washing' or washing_method == 'none') and (drycleaning == 'DO NOT DRYCLEAN' or drycleaning == 'none')):
                            # Create a label with necessary fields: id, name, width, height, annotation, fiber_composition, washing_method, and drycleaning.
                            label = {
                                'id': json_object['dataset']['dataset.id'],
                                'name': json_object['dataset']['dataset.name'],
                                'width': json_object['dataset']['dataset.width'],
                                'height': json_object['dataset']['dataset.height'],
                                'annotation': json_object['annotation'],
                                'fiber_composition': json_object['metadata.clothes']['metadata.clothes.fiber_composition'],
                                'washing_method': json_object['metadata.clothes']['metadata.clothes.washing_method'],
                                'drycleaning': json_object['metadata.clothes']['metadata.clothes.drycleaning']
                            }
                            labels.append(label)
        end = time()
        print(f"Finish loading all json files in '{path}', elapsed = {end - start:.2f} sec(s)")
        return labels
    else:
        error_msg = "ASSERTION FAILED:"
        if not os.path.isdir(path):
            error_msg += f" Path to label files must be a directory!\n"
        elif "_reduced" in path:
            error_msg += f" Can't load files from the directory {path} because it has reserved name '_reduced'\n"
        raise AssertionError(error_msg)

In [32]:
def writeLabelsIn(path, labels):
    if labels is not None:
        # Create a directory to write reduced json files in.
        directory_index = path.rfind("/", 0, len(path) - 2)
        directory_name = path[directory_index + 1:len(path) - 1]
        dest = path[:directory_index + 1] + directory_name + "_reduced/"
        
        if not os.path.exists(dest):
            os.makedirs(dest)
            
        start = time()
        print(f"Write all json files in '{path}'")
        for label in labels:
            json_object = json.dumps(label, indent = 4)
            file_path = dest + label['name'] + '.json'
            with open(file_path, 'w') as new_json:
                new_json.write(json_object)
        end = time()
        print(f"Finish writing all json files in '{path}', elapsed = {end - start:.2f} sec(s)")

In [30]:
root = "./dataset/labels/"
sets = os.scandir(root)
print(f"Start a batch process which converts all labels in '{root}' into reduced ones.")
print(f"--------------------------------------------------------------------------------------------------------------------")
start = time()
# Iterate through label sets of datasets.
write_list = {}
for set in sets:
    sub_root = os.scandir(set)
    # Store all items in the sets by key(path to write) and value(label info) mapping.
    for item in sub_root:  # sub_root =: [train | val]
        if os.path.isdir(item) and item.name != "zip":
            path = root + set.name + "/" + item.name + "/"
            write_list[path] = loadLabelsFrom(path)

Start a batch process which converts all labels in './dataset/labels/' into reduced ones.
--------------------------------------------------------------------------------------------------------------------
Load all json files in './dataset/labels/train/TL_blouse/'
Finish loading all json files in './dataset/labels/train/TL_blouse/', elapsed = 6.24 sec(s)
Load all json files in './dataset/labels/train/TL_bottom/'
Finish loading all json files in './dataset/labels/train/TL_bottom/', elapsed = 23.41 sec(s)
Load all json files in './dataset/labels/train/TL_cardigan/'
Finish loading all json files in './dataset/labels/train/TL_cardigan/', elapsed = 5.74 sec(s)
Load all json files in './dataset/labels/train/TL_coat/'
Finish loading all json files in './dataset/labels/train/TL_coat/', elapsed = 6.75 sec(s)
Load all json files in './dataset/labels/train/TL_jacket/'
Finish loading all json files in './dataset/labels/train/TL_jacket/', elapsed = 6.97 sec(s)
Load all json files in './dataset/lab

In [33]:
# Write them into their corresponding paths.
for path, item in write_list.items():
    writeLabelsIn(path, item)
end = time()
print(f"--------------------------------------------------------------------------------------------------------------------")
print(f"Finish a batch process which converts all labels in '{root}' into reduced ones, elapsed = {end - start:.6f} sec(s).")

Write all json files in './dataset/labels/train/TL_blouse/'
Finish writing all json files in './dataset/labels/train/TL_blouse/', elapsed = 4.18 sec(s)
Write all json files in './dataset/labels/train/TL_bottom/'
Finish writing all json files in './dataset/labels/train/TL_bottom/', elapsed = 12.85 sec(s)
Write all json files in './dataset/labels/train/TL_cardigan/'
Finish writing all json files in './dataset/labels/train/TL_cardigan/', elapsed = 3.71 sec(s)
Write all json files in './dataset/labels/train/TL_coat/'
Finish writing all json files in './dataset/labels/train/TL_coat/', elapsed = 5.45 sec(s)
Write all json files in './dataset/labels/train/TL_jacket/'
Finish writing all json files in './dataset/labels/train/TL_jacket/', elapsed = 4.88 sec(s)
Write all json files in './dataset/labels/train/TL_jumper/'
Finish writing all json files in './dataset/labels/train/TL_jumper/', elapsed = 6.06 sec(s)
Write all json files in './dataset/labels/train/TL_onepiece_dress/'
Finish writing all 