In [None]:
from PIL import Image
import shutil
import os
import sys
from tqdm import tqdm

oid_d = '/root/OIDv4_ToolKit/'

In [None]:
def get_image_size(path):
    return Image.open(path).size

def create_dataset_folders(dataset_d):
    os.makedirs(dataset_d + '/images')
    os.makedirs(dataset_d + '/labels')

def move_to_dataset(class_d, dataset_d, level):
    dataset_images_d = os.path.join(dataset_d, 'images')
    dataset_labels_d = os.path.join(dataset_d, 'labels')
    
    for file in tqdm(os.listdir(os.path.join(class_d, 'Label')), file=sys.stdout):
        img_path = os.path.join(class_d, file.replace('.txt', '.jpg'))
        img_w, img_h = get_image_size(img_path)
        
        fin = os.path.join(class_d, 'Label', file)
        with open(fin) as infile:
            fout = os.path.join(dataset_labels_d, file)
            with open(fout, "w") as outfile:
                for line in infile:
                    l = line.strip().split(' ')
                    classname = l[0]

                    try:
                        test = str(int(float(l[1])))
                        offset = 0
                    except ValueError:
                        classname = l[0] + '_' + l[1]
                        offset = 1

                    x1 = float(l[1 + offset]) / img_w
                    y1 = float(l[2 + offset]) / img_h
                    x2 = float(l[3 + offset]) / img_w
                    y2 = float(l[4 + offset]) / img_h
                    
                    xc = (x1 + x2) / 2
                    yc = (y1 + y2) / 2
                    w = x2 - x1
                    h = y2 - y1
                    
                    outfile.write(f'{level} {xc} {yc} {w} {w}\n')

        shutil.move(img_path, dataset_images_d)

In [None]:
def start():
    os.chdir(os.path.join(oid_d, "OID", "Dataset"))
    dirs = os.listdir(os.getcwd())

    for dir in dirs:
        if os.path.isdir(dir):
            os.chdir(dir)
            print(f'Currently in subdirectory: {dir}')

            dataset_d = os.path.join(os.getcwd(), 'dataset')
            if os.path.isdir(dataset_d):
                shutil.rmtree(dataset_d, ignore_errors=True)

            class_dirs = os.listdir(os.getcwd())
            for class_dir in class_dirs:
                if " " in class_dir:
                    os.rename(class_dir, class_dir.replace(" ", "_"))

            class_dirs = os.listdir(os.getcwd())

            create_dataset_folders(dataset_d)
            level = 0
            for class_dir in class_dirs:
                if os.path.isdir(class_dir):
                    print(f"Processing class: {class_dir}")
                    move_to_dataset(class_dir, dataset_d, level)
                    level += 1

            print('Creating train and valid files...')

            files = os.listdir(os.path.join(dataset_d, 'images'))
            img_list = [os.path.join(dataset_d, 'images', f) for f in files]

            train_file = open(os.path.join(dataset_d, 'train.txt'), 'w')
            valid_file = open(os.path.join(dataset_d, 'valid.txt'), 'w')

            counter = 1  
            index_test = round(100 / 30)  
            for img in tqdm(img_list, file=sys.stdout):
                if counter == index_test + 1:
                    counter = 1
                    valid_file.write(f'{img}\n')
                else:
                    train_file.write(f'{img}\n')
                    counter += 1

            print('Done!')

In [None]:
start()