This notebook is to generate a dataset to use with ultralytics YOLO-pose training

In [5]:
import os
import pandas as pd
import numpy as np
import yaml
import shutil
import cv2
import tqdm

In [6]:
keypoint_set = 'small_17'
data_root = '../data'
data_path = 'YOLO_' + keypoint_set

# rm -rf data_root/YOLO
shutil.rmtree(f'{data_root}/{data_path}', ignore_errors=True)

os.makedirs(f'{data_root}/{data_path}', exist_ok=True)

keypoints = yaml.load(open(f'../configs/keypoint_sets.yaml'), Loader=yaml.FullLoader)
keypoints = keypoints[keypoint_set]

In [7]:
def generate_set(filename):
    set_dir = os.path.join(data_root, data_path, filename)
    os.makedirs(set_dir, exist_ok=True)
    os.makedirs(os.path.join(set_dir, 'images'), exist_ok=True)
    os.makedirs(os.path.join(set_dir, 'labels'), exist_ok=True)
    df = pd.read_csv(os.path.join(data_root, f'{filename}.csv'))

    df_detect = pd.read_csv(os.path.join(data_root, f'detect.csv'))

    for i, row in tqdm.tqdm(df.iterrows(), total=len(df)):
        img_path = row['img_path']
        img_name = os.path.basename(img_path)
        base_name = os.path.splitext(img_name)[0]

        row_detect = df_detect[df_detect['img_path'] == img_path]

        
        # get image size
        img = cv2.imread(os.path.join(data_root, 'frames', img_path)) 
        h, w, _ = img.shape

        box_x = int(row_detect['box_x'].values[0])
        box_y = int(row_detect['box_y'].values[0])
        box_w = int(row_detect['box_w'].values[0])
        box_h = int(row_detect['box_h'].values[0])

        crop_x, crop_y, crop_w, crop_h = box_x, box_y, box_w, box_h
        
        # change box aspect ratio to 1
        if box_w > box_h:
            diff = box_w - box_h
            crop_y -= diff // 2
            crop_h = box_w
        else:
            diff = box_h - box_w
            crop_x -= diff // 2
            crop_w = box_h

        crop_x = max(0, crop_x)
        crop_y = max(0, crop_y)
        crop_w = min(w - crop_x, crop_w)
        crop_h = min(h - crop_y, crop_h)
        # crop image
        img = img[crop_y:crop_y+crop_h, crop_x:crop_x+crop_w]

        box_x -= crop_x
        box_y -= crop_y

        ncx = (box_x + box_w // 2) / crop_w
        ncy = (box_y + box_h // 2) / crop_h
        nw = box_w / crop_w
        nh = box_h / crop_h
        
        # generate label
        label = f'0 {ncx} {ncy} {nw} {nh}'
        for kp in keypoints:
            x = (row_detect[f'{kp}_u'].values[0] - crop_x) / crop_w
            y = (row_detect[f'{kp}_v'].values[0] - crop_y) / crop_h
            v = 1 if not np.isnan(x) and not np.isnan(y) else 0
            label += f' {x} {y} {v}'

        cv2.imwrite(f'{data_root}/{data_path}/{filename}/images/{img_name}', img)

        with open(f'{data_root}/{data_path}/{filename}/labels/{base_name}.txt', 'w') as f:
            f.write(label)

In [8]:
generate_set('train')
generate_set('val')

100%|██████████| 59927/59927 [13:53<00:00, 71.91it/s]
100%|██████████| 11968/11968 [02:41<00:00, 74.10it/s]
