* https://medium.com/@beyzaakyildiz/what-is-yolov8-how-to-use-it-b3807d13c5ce
* https://docs.ultralytics.com/modes/train/#usage-examples

In [2]:
import scipy.io
import numpy as np
import h5py
import os
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm
import gc
from collections import Counter

In [3]:
def mat_to_dataset(mat_path):
    f = h5py.File(mat_path, mode='r')
    datasets = {}
    files_count = len(f['digitStruct']['name'])
    for i in range(files_count):
        name_uint16 = f[f['digitStruct']['name'][i,0]][:]
        name = ''.join(chr(n[0]) for n in name_uint16)
        
        bbox = {}
        box_i = f[f['digitStruct']['bbox'][i,0]]
        length = box_i['label'].shape[0]
        for key in ['height', 'label', 'left', 'top', 'width']:
            l = []
            if key=='label':
                l = [ int(str(int(f[box_i[key][index,0]][0][0]))[-1]) if length > 1 else int(box_i[key][0][0]) for index in range(length) ]
            else:
                l = [ int(f[box_i[key][index,0]][0][0]) if length > 1 else int(box_i[key][0][0]) for index in range(length) ]
            bbox[key] = l
        datasets[name] = bbox
        print(f'Loading {i} / {files_count}.\r', end='') 
    print() 
    print(f'{i+1} records loaded.') 
    return datasets

In [4]:
TRAIN_ANNOTATION_F = './data/meta_data/train_digitStruct.mat'
TEST_ANNOTATION_F = './data/meta_data/test_digitStruct.mat'

In [5]:
def mat2yolo(file, data_dir):
    gc.collect()
    annots = mat_to_dataset(file)

    images_dir = f'{data_dir}/images'
    labels_dir = f'{data_dir}/labels'
    label_frequency = []
    corrupted_images = []
    for image_name, image_data in tqdm(annots.items()):
        
        image = Image.open(f"{images_dir}/{image_name}")
        img_w, img_h = image.size
        image.close()

        #
        yolo8_img_data = []
        corrupted = False
        for i in range(len(image_data['label'])):
            label = image_data['label'][i]

            if label == 10:
                corrupted_images.append(image_name)
                corrupted = True
                break

            # 0...224 -> 0...1
            norm_tl_xy = image_data['left'][i] / img_w, image_data['top'][i] / img_h
            norm_br_xy =  (image_data['left'][i] + image_data['width'][i]) / img_w, (image_data['top'][i] + image_data['height'][i]) / img_h

            yolo8_img_data.append(' '.join(map(lambda x: str(round(x,3)), [
                label, norm_tl_xy[0], norm_tl_xy[1], 
                norm_br_xy[0], norm_br_xy[1]])))

        #
        if not corrupted:
            label_frequency += image_data['label']
            yolo8_img_txt = image_name.split('.')[0]
            with open(f"{labels_dir}/{yolo8_img_txt}.txt", 'w', encoding='utf-8') as fd:
                yolo8_img_dump = '\n'.join(yolo8_img_data)
                fd.write(yolo8_img_dump)

    print(Counter(label_frequency))
    return corrupted_images

def move_corrupted_images(images, part):
    corrupted_dir = f'./data/corrupted_images/{part}'
    images_dir = f'./data/yolo8_f/{part}/images'
    for image_name in tqdm(images):
        os.rename(f"{images_dir}/{image_name}", f"{corrupted_dir}/{image_name}")

In [5]:
corr_test_images = mat2yolo(TEST_ANNOTATION_F, "./data/yolo8_f/test")
#move_corrupted_images(corr_test_images, 'test')

Loading 13067 / 13068.
13068 records loaded.


100%|██████████| 13068/13068 [00:01<00:00, 11218.46it/s]

Counter({1: 5099, 2: 4149, 3: 2882, 4: 2523, 5: 2384, 7: 2019, 6: 1977, 0: 1733, 8: 1660, 9: 1595})





In [6]:
corr_train_images = mat2yolo(TRAIN_ANNOTATION_F, "./data/yolo8_f/train")
#move_corrupted_images(corr_train_images, 'train')

Loading 33401 / 33402.
33402 records loaded.


100%|██████████| 33402/33402 [00:02<00:00, 11147.85it/s]

Counter({1: 13861, 2: 10585, 3: 8497, 4: 7458, 5: 6882, 6: 5727, 7: 5595, 8: 5045, 0: 4894, 9: 4659})





In [6]:
def draw_annotated_image(name, part):
    images_dir = f"./data/yolo8_f/{part}/images"
    labels_dir = f"./data/yolo8_f/{part}/labels"

    with open(f"{labels_dir}/{name}.txt", 'r', encoding='utf-8') as fd:
        info = fd.readlines() 

    img = Image.open(f"{images_dir}/{name}.png")
    img_copy = img.copy()
    img_w, img_h = img.size
    draw = ImageDraw.Draw(img_copy)

    for i in range(len(info)):
        l, x1, y1, x2, y2 = list(map(float, info[i].strip().split(' ')))
        draw.rectangle((x1*img_w, y1*img_h, x2*img_w, y2*img_h), outline="red", width=1)

    img_copy.show()

In [7]:
draw_annotated_image('648', 'test')

In [13]:
draw_annotated_image('2', 'train')