# Read all labels from train and validation label sets

In [5]:
import json
import os
import math
from time import time

In [6]:
def loadReducedLabelsFrom(path): # directory path
    if os.path.isdir(path) and ("TL_reduced" in path or "VL_reduced" in path):
        with os.scandir(path) as entries:
            labels = []
            start = time()
            print(f"Load all reduced json files in '{path}'")
            for entry in entries:
                if os.path.isfile(entry) and ".json" in entry.name:
                    with open(path + entry.name, 'r', encoding="UTF-8") as file:
                        label = json.load(file)
                        labels.append((path, label))
        end = time()
        print(f"Finish loading all reduced json files in '{path}', elapsed = {end - start:.2f} sec(s)")
        return labels
    else:
        error_msg = "ASSERTION FAILED:"
        if not os.path.isdir(path):
            error_msg += f" Path to label files must be a directory!\n"
        elif not ("TL_reduced" in path or "VL_reduced" in path):
            error_msg += f" Path must have a reserved name ended with 'TL_reduced' or 'VL_reduced'!\n"
        raise AssertionError(error_msg)

In [9]:
root = './../dataset/labels/'
labels = []
print(f"Start loading files")
print("--------------------------------------------------------------------------------------")
start = time()
with os.scandir(root) as dirs:
    for dir in dirs:
        if os.path.isdir(dir) and (dir.name == "train" or dir.name == "val"):
            type = "TL" if dir.name == "train" else "VL"
            path_reduced = root + dir.name + "/" + type + "_reduced/"
            with os.scandir(path_reduced) as items:
                for item in items:
                    if not 'txt' in item.name:
                        labels.extend(loadReducedLabelsFrom(path_reduced + item.name + '/'))
end = time()
print(f"Finish loading files, elapsed = {end - start:.2f} sec(s)")
print("--------------------------------------------------------------------------------------")

Start loading files
--------------------------------------------------------------------------------------
Load all reduced json files in './../dataset/labels/train/TL_reduced/TL_blouse_reduced/'
Finish loading all reduced json files in './../dataset/labels/train/TL_reduced/TL_blouse_reduced/', elapsed = 29.52 sec(s)
Load all reduced json files in './../dataset/labels/train/TL_reduced/TL_bottom_reduced/'
Finish loading all reduced json files in './../dataset/labels/train/TL_reduced/TL_bottom_reduced/', elapsed = 100.99 sec(s)
Load all reduced json files in './../dataset/labels/train/TL_reduced/TL_cardigan_reduced/'
Finish loading all reduced json files in './../dataset/labels/train/TL_reduced/TL_cardigan_reduced/', elapsed = 26.47 sec(s)
Load all reduced json files in './../dataset/labels/train/TL_reduced/TL_coat_reduced/'
Finish loading all reduced json files in './../dataset/labels/train/TL_reduced/TL_coat_reduced/', elapsed = 36.04 sec(s)
Load all reduced json files in './../dataset

# Convert json files into yolov8 text files

In [15]:
# Return an normalized bounding-box of arbitrary points.
def bbox(points, point_count, image_width, image_height):
    # Find min and max points represented by integer numbers.
    min_x = math.inf
    min_y = min_x
    max_x = -min_x
    max_y = -min_y
    for i in range(int(point_count)):
        x, y = points[2*i], points[2*i+1]
        min_x = x if x <= min_x else min_x
        max_x = x if x > max_x else max_x
        min_y = y if y <= min_y else min_y
        max_y = y if y > max_y else max_y
    # Calculate a bbox center, a bbox width, and a bbox height.
    center_x = 0.5 * (min_x + max_x)
    center_y = 0.5 * (min_y + max_y)
    box_width = max_x - min_x
    box_height = max_y - min_y
    # Normalize the center and the bbox dimensions.
    nx = center_x / image_width
    ny = center_y / image_height
    w = box_width / image_width
    h = box_height / image_height
    return (nx, ny, w, h)

# Class candidates

    fiber composition =: [cotton | hemp | ... | synthetic fiber others]  
    
    washing method =: [hand washing30 | washing30 | ... | washing95]
    
    drycleaning =: [dryclean | dryclean : laundry | dryclean : petroleum solvent only]
    
    

# Version 1: Combined classes

In [10]:
# Version 1
fibers = {
    # fiber composition
    'cotton': 0,
    'hemp': 1,
    'cellulose fiber others': 2,
    'silk': 3,
    'wool': 4,
    'protein fiber others': 5,
    'viscos rayon': 6,
    'regenerated fiber others': 7,
    'polyester': 8,
    'nylon': 9,
    'polyurethane': 10,
    'synthetic fiber others': 11,
}
methods = {
    # washing method
    'hand washing30': 0,
    'washing30': 1,
    'washing40': 2,
    'washing60': 3,
    'washing95': 4,
                        
    # drycleaning
    'dryclean': 5,
    'dryclean : laundry': 6,
    'dryclean : petroleum solvent only': 7
}

In [None]:
# Version 1
def convert(label, bbox):
    fiber_composition = list(set(label['fiber_composition'].split(",")))
    washing_method = label['washing_method'].lower().strip()
    drycleaning = label['drycleaning'].lower().strip()
    bbox_str = " ".join([str(bbox[0]), str(bbox[1]), str(bbox[2]), str(bbox[3])])
    
    txt = ""
    nr_methods = len(methods)
    for fiber in fiber_composition:
        f = fiber.lower().strip()
        if washing_method != 'do not washing' and washing_method != 'none':
            washing_method = 'washing40' if washing_method == 'washing40_1' else washing_method
            m = washing_method
        else:
            m = drycleaning
        txt += str(nr_methods * fibers[f] + methods[m]) + " " + bbox_str + "\n"

    return txt

# Version 2: ordinary object detection of multi-label classification

In [35]:
# Version 2/3: individual classes
methods = {
    # fiber composition
    'cotton': 0,
    'hemp': 1,
    'cellulose fiber others': 2,
    'silk': 3,
    'wool': 4,
    'protein fiber others': 5,
    'viscos rayon': 6,
    'regenerated fiber others': 7,
    'polyester': 8,
    'nylon': 9,
    'polyurethane': 10,
    'synthetic fiber others': 11,
    
    # washing method
    'hand washing30': 12,
    'washing30': 13,
    'washing40': 14,
    'washing60': 15,
    'washing95': 16,
                        
    # drycleaning
    'dryclean': 17,
    'dryclean : laundry': 18,
    'dryclean : petroleum solvent only': 19
}

In [36]:
# Version 2
def convert(label, bbox):
    fiber_composition = list(set(label['fiber_composition'].split(",")))
    washing_method = label['washing_method'].lower().strip()
    drycleaning = label['drycleaning'].lower().strip()
    bbox_str = ' '.join([str(bbox[0]), str(bbox[1]), str(bbox[2]), str(bbox[3])])
    
    txt = ''
    if washing_method != 'do not washing' and washing_method != 'none':
        washing_method = 'washing40' if washing_method == 'washing40_1' else washing_method
        m = washing_method
    else:
        m = drycleaning
    txt += str(methods[m]) + ' ' + bbox_str + '\n'
    for fiber in fiber_composition:
        f = fiber.lower().strip()
        txt += str(methods[f]) + ' ' + bbox_str + '\n'
    
    return txt

# Version 3: Multi-label object detection (not supported by YOLOv8 now)

In [28]:
# Version 3
def convert(label, bbox):
    fiber_composition = list(set(label['fiber_composition'].split(",")))
    washing_method = label['washing_method'].lower().strip()
    drycleaning = label['drycleaning'].lower().strip()
    bbox_str = ' '.join([str(bbox[0]), str(bbox[1]), str(bbox[2]), str(bbox[3])])
    
    txt = ''
    if washing_method != 'do not washing' and washing_method != 'none':
        washing_method = 'washing40' if washing_method == 'washing40_1' else washing_method
        m = washing_method
    else:
        m = drycleaning
    txt += str(methods[m]) + ' ' + bbox_str + ' '
    for fiber in fiber_composition:
        f = fiber.lower().strip()
        txt += str(methods[f]) + ' '
    txt += '\n'
    
    return txt

# Version 4: Sequential training

In [33]:
# Version 4
fibers = {
    # fiber composition
    'cotton': 0,
    'hemp': 1,
    'cellulose fiber others': 2,
    'silk': 3,
    'wool': 4,
    'protein fiber others': 5,
    'viscos rayon': 6,
    'regenerated fiber others': 7,
    'polyester': 8,
    'nylon': 9,
    'polyurethane': 10,
    'synthetic fiber others': 11,
}
methods = {
    # washing method
    'hand washing30': 0,
    'washing30': 1,
    'washing40': 2,
    'washing60': 3,
    'washing95': 4,
                        
    # drycleaning
    'dryclean': 5,
    'dryclean : laundry': 6,
    'dryclean : petroleum solvent only': 7
}

In [38]:
# Version 4
def convert(label, bbox, type):
    if type == 'f':
        return convertFibers(label, bbox)
    elif type == 'm':
        return convertMethods(label, bbox)
    else:
        exit(0)

# for methods
def convertMethods(label, bbox):
    fiber_composition = list(set(label['fiber_composition'].split(",")))
    washing_method = label['washing_method'].lower().strip()
    drycleaning = label['drycleaning'].lower().strip()
    bbox_str = ' '.join([str(bbox[0]), str(bbox[1]), str(bbox[2]), str(bbox[3])])
    
    txt = ''
    if washing_method != 'do not washing' and washing_method != 'none':
        washing_method = 'washing40' if washing_method == 'washing40_1' else washing_method
        m = washing_method
    else:
        m = drycleaning
    txt += str(methods[m]) + ' ' + bbox_str + '\n'
    
    return txt

# for fibers
def convertFibers(label, bbox):
    fiber_composition = list(set(label['fiber_composition'].split(",")))
    washing_method = label['washing_method'].lower().strip()
    drycleaning = label['drycleaning'].lower().strip()
    bbox_str = ' '.join([str(bbox[0]), str(bbox[1]), str(bbox[2]), str(bbox[3])])
    
    txt = ''
    for fiber in fiber_composition:
        f = fiber.lower().strip()
        txt += str(fibers[f]) + ' ' + bbox_str + '\n'
    
    return txt

In [39]:
def saveAllTextLabels(labels: dict):
    for root, label in labels:
        idx = root.rfind('/', 0, len(root) - 1)
        labelset_path = root[:idx] + '/'
        directory_name = root[idx + 1: len(root) - 1]
        
        type = 'TL' if 'TL' in directory_name else 'VL'
        bucketDir = labelset_path + type + '_txt/'
        txtDir = bucketDir + directory_name + '_txt/'
        # If the path to contain directories including text files doesn't exist, create it.
        if not os.path.exists(bucketDir):
            os.makedirs(bucketDir)
        # If the path to store text files doesn't exist, create it.
        if not os.path.exists(txtDir):
            os.makedirs(txtDir)
        # Write them in the text path.
        id, name = label['id'], label['name']
        path = txtDir + name + ".txt"
        with open(path, 'w') as f:
            width, height = label['width'], label['height']
            annotation = label['annotation'][0]
            points, point_count = annotation['annotation_point'], annotation['annotation_point_count']
            txt = convert(label, bbox(points, point_count, width, height))
            f.write(txt)

def saveAllTextLabelsInOne(labels: dict, version = None):
    for root, label in labels:
        type = 'train' if 'train' in root else 'val'
        path = root[:root.find(type)] + type + '/reduced/'
        # If the path to store text files doesn't exist, create it.
        if not os.path.exists(path):
            os.makedirs(path)
        # Write them in the path.
        id, name = label['id'], label['name']
        with open(path + name + ".txt", 'w') as f:
            width, height = label['width'], label['height']
            annotation = label['annotation'][0]
            points, point_count = annotation['annotation_point'], annotation['annotation_point_count']
            if version is None:
                txt = convert(label, bbox(points, point_count, width, height))
            else:
                txt = convert(label, bbox(points, point_count, width, height), version)
            f.write(txt)

In [21]:
print(len(labels))

186225


In [40]:
root = './../dataset/labels/'
print(f"Start converting all json files in {root}")
start = time()
with os.scandir(root) as dirs:
    for dir in dirs:
        if os.path.isdir(dir) and (dir.name == "train" or dir.name == "val"):
            saveAllTextLabelsInOne(labels, 'm')
end = time()
print(f"Finish converting all json files in {root}, elapsed = {end - start:.2f} sec(s)")

Start converting all json files in ./../dataset/labels/
Finish converting all json files in ./../dataset/labels/, elapsed = 92.41 sec(s)


# Create a yaml file based on the class list

In [4]:
# Version 1
import yaml

fibers = ['cotton', 'hemp', 'cellulose fiber others',
          'silk', 'wool', 'protein fiber others',
          'viscos rayon', 'regenerated fiber others', 'polyester',
          'nylon', 'polyurethane', 'synthetic fiber others']
methods = ['hand washing30', 'washing30', 'washing40', 'washing60', 'washing95',
           'dryclean', 'dryclean : laundry', 'dryclean : petroleum solvent only']

names = []
for f in fibers:
    for m in methods:
        names.append(f + '/' + m)

data = {
    'train': os.getcwd() + '/dataset/images/train/reduced/',
    'val': os.getcwd() + '/dataset/images/val/reduced/',
    'names': names,
    'nc': len(names)
}

with open('./dataset/data.yaml', 'w') as f:
    yaml.dump(data, f)

with open('./dataset/data.yaml', 'r') as f:
    cls = yaml.safe_load(f)
    display(cls)

{'names': ['cotton/hand washing30',
  'cotton/washing30',
  'cotton/washing40',
  'cotton/washing60',
  'cotton/washing95',
  'cotton/dryclean',
  'cotton/dryclean : laundry',
  'cotton/dryclean : petroleum solvent only',
  'hemp/hand washing30',
  'hemp/washing30',
  'hemp/washing40',
  'hemp/washing60',
  'hemp/washing95',
  'hemp/dryclean',
  'hemp/dryclean : laundry',
  'hemp/dryclean : petroleum solvent only',
  'cellulose fiber others/hand washing30',
  'cellulose fiber others/washing30',
  'cellulose fiber others/washing40',
  'cellulose fiber others/washing60',
  'cellulose fiber others/washing95',
  'cellulose fiber others/dryclean',
  'cellulose fiber others/dryclean : laundry',
  'cellulose fiber others/dryclean : petroleum solvent only',
  'silk/hand washing30',
  'silk/washing30',
  'silk/washing40',
  'silk/washing60',
  'silk/washing95',
  'silk/dryclean',
  'silk/dryclean : laundry',
  'silk/dryclean : petroleum solvent only',
  'wool/hand washing30',
  'wool/washing30'

In [32]:
# Version 2/3
import yaml

fibers = ['cotton', 'hemp', 'cellulose fiber others',
          'silk', 'wool', 'protein fiber others',
          'viscos rayon', 'regenerated fiber others', 'polyester',
          'nylon', 'polyurethane', 'synthetic fiber others']
methods = ['hand washing30', 'washing30', 'washing40', 'washing60', 'washing95',
           'dryclean', 'dryclean : laundry', 'dryclean : petroleum solvent only']
names = fibers + methods

data = {
    'train': os.getcwd() + '/dataset/images/train/resampled/',
    'val': os.getcwd() + '/dataset/images/val/resampled/',
    'names': names,
    'nc': len(names),
}

with open('./dataset/data.yaml', 'w') as f:
    yaml.dump(data, f)

with open('./dataset/data.yaml', 'r') as f:
    cls = yaml.safe_load(f)
    display(cls)

{'names': ['cotton',
  'hemp',
  'cellulose fiber others',
  'silk',
  'wool',
  'protein fiber others',
  'viscos rayon',
  'regenerated fiber others',
  'polyester',
  'nylon',
  'polyurethane',
  'synthetic fiber others',
  'hand washing30',
  'washing30',
  'washing40',
  'washing60',
  'washing95',
  'dryclean',
  'dryclean : laundry',
  'dryclean : petroleum solvent only'],
 'nc': 20,
 'train': 'd:\\source\\jupyter\\course\\project/dataset/images/train/reduced/',
 'val': 'd:\\source\\jupyter\\course\\project/dataset/images/val/reduced/'}

In [1]:
# Version 4: sequential training (fiber -> method / method -> fiber)
import yaml
import os

fibers = ['cotton', 'hemp', 'cellulose fiber others',
          'silk', 'wool', 'protein fiber others',
          'viscos rayon', 'regenerated fiber others', 'polyester',
          'nylon', 'polyurethane', 'synthetic fiber others']
methods = ['hand washing30', 'washing30', 'washing40', 'washing60', 'washing95',
           'dryclean', 'dryclean : laundry', 'dryclean : petroleum solvent only']

fiber = {
    'train': os.getcwd() + '/../dataset/images/train/resampled/',
    'val': os.getcwd() + '/../dataset/images/val/resampled/',
    'names': fibers,
    'nc': len(fibers),
}
method = {
    'train': os.getcwd() + '/../dataset/images/train/resampled/',
    'val': os.getcwd() + '/../dataset/images/val/resampled/',
    'names': methods,
    'nc': len(methods),
}

with open('./../dataset/fiber.yaml', 'w') as f:
    yaml.dump(fiber, f)

with open('./../dataset/fiber.yaml', 'r') as f:
    cls = yaml.safe_load(f)
    display(cls)

with open('./../dataset/method.yaml', 'w') as f:
    yaml.dump(method, f)

with open('./../dataset/method.yaml', 'r') as f:
    cls = yaml.safe_load(f)
    display(cls)

{'names': ['cotton',
  'hemp',
  'cellulose fiber others',
  'silk',
  'wool',
  'protein fiber others',
  'viscos rayon',
  'regenerated fiber others',
  'polyester',
  'nylon',
  'polyurethane',
  'synthetic fiber others'],
 'nc': 12,
 'train': 'D:\\source\\jupyter\\course\\project\\conversion/../dataset/images/train/resampled/',
 'val': 'D:\\source\\jupyter\\course\\project\\conversion/../dataset/images/val/resampled/'}

{'names': ['hand washing30',
  'washing30',
  'washing40',
  'washing60',
  'washing95',
  'dryclean',
  'dryclean : laundry',
  'dryclean : petroleum solvent only'],
 'nc': 8,
 'train': 'D:\\source\\jupyter\\course\\project\\conversion/../dataset/images/train/resampled/',
 'val': 'D:\\source\\jupyter\\course\\project\\conversion/../dataset/images/val/resampled/'}