In [1]:
import os
from PIL import Image
import cv2
from tqdm.notebook import tqdm

In [2]:
original_root = 'Original/'
target_root = 'TextDetection/Train'

In [3]:
def convert_to_yolotxt(source_root, target_root):
    
    # Define source paths
    src_images_dir = os.path.join(source_root, 'Image')
    src_labels_dir = os.path.join(source_root, 'Annotation')
    
    # Define target paths
    tar_images_dir = os.path.join(target_root, 'images')
    tar_labels_dir = os.path.join(target_root, 'labels')

    # Create target directory structure
    os.makedirs(tar_images_dir, exist_ok=True)
    os.makedirs(tar_labels_dir, exist_ok=True)

    file_ctr = 0
    # Load Annotations and Images
    for img_subfolder, lbl_subfolder in tqdm(zip(os.listdir(src_images_dir), 
                                                 os.listdir(src_labels_dir)), 
                                             total=len(os.listdir(src_images_dir))):
        
        img_subfolder_path = os.path.join(src_images_dir, img_subfolder)
        lbl_subfolder_path = os.path.join(src_labels_dir, lbl_subfolder)
        # print(f'Currently in SubFolder {img_subfolder}')

        for img_file, lbl_file in tqdm(zip(os.listdir(img_subfolder_path), 
                                           os.listdir(lbl_subfolder_path)), 
                                       total=len(os.listdir(img_subfolder_path))):
            
            img_file_path = os.path.join(img_subfolder_path, img_file)
            lbl_file_path = os.path.join(lbl_subfolder_path, lbl_file)

            # print(img_file_path, lbl_file_path)
            
            # load image
            image = Image.open(img_file_path)
            
            # load original annotations
            with open(lbl_file_path, 'r') as f:
                annotations = f.read().strip().split('\n')

            # print(annotations)
            
            # create an ordered list of coordinate tuples [(x1, y1), ...]
            coordinates = []
            for a in annotations:
                vals = a.split()[:-1]
                coord = []
                for i in range(4):
                    x = int(float(vals[i]))
                    y = int(float(vals[i+4]))
                    coord.append((x if x >= 0 else 0, y if y >= 0 else 0))

                coordinates.append(coord)
            

            # convert to yolo annotations
            # cls_id, centerX, centerY, width, height
            # (values are normalized)
            boxes = []
            yolo_annots = []

            for coord in coordinates:
                top, _, _, bottom = sorted(coord, key=lambda c: c[1])
                left, _, _, right = sorted(coord, key=lambda c: c[0])

                top_left = (left[0], top[1])
                bottom_right = (right[0], bottom[1])

                center_x, center_y = (top_left[0] + bottom_right[0]) / 2, (top_left[1] + bottom_right[1]) / 2
                width, height = abs(top_left[0] - bottom_right[0]), abs(top_left[1] - bottom_right[1])

                boxes.append((top_left, bottom_right))

                yolo_annots.append((0, center_x / image.width, center_y / image.height, width / image.width, height / image.height))

            # converting the annotations into string
            yolo_annotation_string = []
            
            for y in yolo_annots:
                yolo_annotation_string.append(" ".join(list(map(lambda x: str(round(x, 3)), y))))
            
            yolo_annotation_string = "\n".join(yolo_annotation_string)
            
            # write the annotations and image to the target dir
            with open(os.path.join(tar_labels_dir, f"{file_ctr}.txt"), 'w') as f:
                f.write(yolo_annotation_string)
            
            image.save(os.path.join(tar_images_dir, f"{file_ctr}.jpg"))
            
            file_ctr += 1

In [4]:
convert_to_yolotxt(original_root, target_root)

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/4703 [00:00<?, ?it/s]

  0%|          | 0/4775 [00:00<?, ?it/s]

  0%|          | 0/4875 [00:00<?, ?it/s]

  0%|          | 0/4577 [00:00<?, ?it/s]

  0%|          | 0/4729 [00:00<?, ?it/s]

  0%|          | 0/4549 [00:00<?, ?it/s]

  0%|          | 0/4745 [00:00<?, ?it/s]

  0%|          | 0/4529 [00:00<?, ?it/s]

  0%|          | 0/4865 [00:00<?, ?it/s]

  0%|          | 0/4251 [00:00<?, ?it/s]

  0%|          | 0/4352 [00:00<?, ?it/s]

  0%|          | 0/4732 [00:00<?, ?it/s]

  0%|          | 0/4525 [00:00<?, ?it/s]

  0%|          | 0/4474 [00:00<?, ?it/s]

  0%|          | 0/4577 [00:00<?, ?it/s]

  0%|          | 0/4669 [00:00<?, ?it/s]

  0%|          | 0/4753 [00:00<?, ?it/s]

  0%|          | 0/4815 [00:00<?, ?it/s]

  0%|          | 0/4636 [00:00<?, ?it/s]

  0%|          | 0/4571 [00:00<?, ?it/s]

  0%|          | 0/4574 [00:00<?, ?it/s]

  0%|          | 0/4587 [00:00<?, ?it/s]

  0%|          | 0/4900 [00:00<?, ?it/s]

  0%|          | 0/4560 [00:00<?, ?it/s]

  0%|          | 0/4809 [00:00<?, ?it/s]

In [6]:
len(os.listdir('TextDetection/Train/labels/'))

116133