In [2]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split

In [5]:
annotations = pd.read_csv('../data/annotations.csv')

In [3]:
# Get 70% of each class for training
train, test = train_test_split(annotations, test_size=0.3, random_state=42, stratify=annotations['class'])

# Get 15% of each class for testing and 15% for validating
test, val = train_test_split(test, test_size=0.5, random_state=42, stratify=test['class'])

In [7]:
# Create dict for mapping class names to numbers
class_dict = {class_name: i for i, class_name in enumerate(['A', 'B', 'E', 'G'])}

In [6]:
# Cast annotation format to YOLO format
def pascal_voc_to_yolo(x1, y1, x2, y2):
    return [((x2 + x1)/(2*512)), ((y2 + y1)/(2*512)), (x2 - x1)/512, (y2 - y1)/512]


In [7]:
# Save annotation
def save_annotation(row, file):
    res = pascal_voc_to_yolo(row[2], row[3], row[4], row[5])
    file.write(
        f"{class_dict[row[6]]} {res[0]} {res[1]} {res[2]} {res[3]}\n")


In [8]:
def create_dataset_split(df, name):
    if not os.path.exists('../data/' + name):
        os.makedirs('../data/' + name)
        os.makedirs('../data/' + name + '/images')
        os.makedirs('../data/' + name + '/labels')

    for line in df.values:
        os.system(
            'cp ../data/{}/{}.jpg ../data/{}/images/{}_{}.jpg'.format(
                line[0], line[1], name, line[0], line[1]))

        with open('../data/{}/labels/{}_{}.txt'.format(name, line[0], line[1]), 'w') as file:
            save_annotation(line, file)


In [9]:
create_dataset_split(train, 'train')
create_dataset_split(test, 'test')
create_dataset_split(val, 'val')