In [1]:
import json
import os
import shutil
import cv2
import threading
import numpy as np
from imantics import Polygons, Mask
import logging
from tqdm import tqdm

In [2]:
def setup_logging(level='info', log_file=None):
    """Initialize logging settings."""
    import logging
    from logging import basicConfig

    from rich.console import Console
    from rich.logging import RichHandler

    if isinstance(level, str):
        level = level.upper()

    handlers = []
    if log_file:
        fh = logging.FileHandler(log_file)
        formatter = logging.Formatter(
            '%(asctime)s %(levelname)s %(message)s %(filename)s:%(lineno)d'
        )
        fh.setFormatter(formatter)
        handlers.append(fh)

    console = Console(width=160)
    handlers.append(RichHandler(console=console))
    basicConfig(level=level, format='%(message)s',
                datefmt='[%X]', handlers=handlers)
setup_logging()

In [3]:
class_names = ['powder_uncover', 'powder_uneven', 'scratch']
class_mapping = {name : i for i, name in enumerate(class_names)}

logger = logging.getLogger()

def copy_file(src: str, dst: str) -> None:
    os.makedirs(os.path.dirname(dst), exist_ok=True)
    if not os.path.exists(dst):
        shutil.copy(src , dst)

def gen_yolov5_txt(origin_h: float , 
                    origin_w: float,
                    label_file_path: str, 
                    txt_file_name: str, 
                    txt_file_root: str, 
                    cls_idx: int):
    with open(label_file_path, 'r') as f:
        data = json.load(f)

    os.makedirs(txt_file_root, exist_ok=True)
    rects = [label['points'] for label in data['shapes']]
    lines = []
    for rect in rects:
        lt_x, lt_y = rect[0] # left top x, y
        rb_x, rb_y = rect[1] # right bottom x, y
        center_x = ((lt_x + rb_x) / 2) / origin_w
        center_y = ((lt_y + rb_y) / 2) / origin_h
        w, h = (rb_x - lt_x) / origin_w, (rb_y - lt_y) / origin_h
        w = abs(w)
        h = abs(h)
        # if w < 0 or h < 0:
        #     print(label_file_path)
        #     logger.warning("W or h is less than 0")
        #     continue
        line = [int(cls_idx), center_x, center_y, w, h]
        line = [str(element) for element in line]
        line = ' '.join(line) + '\n'
        lines.append(line)
    with open(os.path.join(txt_file_root, txt_file_name), 'w') as f:
        f.writelines(lines)


for split_name in ['Train', 'Val']:
    data_root = os.path.join('./origin_data', split_name)
    new_data_root = os.path.join('./yolov5_data', split_name)

    name_idx = 0
    for cls_idx, cls in enumerate(class_names):
        cls_root = os.path.join(data_root, cls)
        for name in tqdm(os.listdir(os.path.join(cls_root, 'image'))):
            # Rename and move image
            new_name = f'{name_idx}.png'
            copy_file(os.path.join(cls_root, 'image', name), 
                    os.path.join(new_data_root, 'images', new_name))

            im = cv2.imread(os.path.join(new_data_root, 'images', new_name))
            h, w, c = im.shape
            txt_file_name = f'{name_idx}.txt'
            txt_file_root = os.path.join(new_data_root, 'label')
            label_file_path = os.path.join(cls_root, 'label', name.split('.')[0] + '.json')
            gen_yolov5_txt(h, w, label_file_path, txt_file_name, txt_file_root, cls_idx)
            name_idx += 1

100%|██████████| 100/100 [00:40<00:00,  2.46it/s]
100%|██████████| 100/100 [00:39<00:00,  2.55it/s]
100%|██████████| 100/100 [00:02<00:00, 39.65it/s]
100%|██████████| 50/50 [00:20<00:00,  2.50it/s]
100%|██████████| 50/50 [00:19<00:00,  2.59it/s]
100%|██████████| 50/50 [00:01<00:00, 40.59it/s]
