# 0. Import Dependencies

In [19]:
import os
import yaml
import shutil
from pylabel import importer

# 1. Define Params

In [11]:
ANN_PATHS = ["./dataset/annotations/instances_val.json", "./dataset/annotations/instances_train.json", "./dataset/annotations/instances_test.json"]
IMG_PATHS = ["./dataset/images/val", "./dataset/images/train", "./dataset/images/test"]
OUTPUT_PATHS = ["./yolov8-dataset/val/labels", "./yolov8-dataset/train/labels", "./yolov8-dataset/test/labels"]
YAML_FILE = ['yolov8-dataset/val/dataset.yaml', 'yolov8-dataset/train/dataset.yaml', 'yolov8-dataset/test/dataset.yaml']

MK_FOLDERS = ["yolov8-dataset/test/images", "yolov8-dataset/test/labels", "yolov8-dataset/train/images", "yolov8-dataset/train/labels", "yolov8-dataset/val/images", "yolov8-dataset/val/labels"]

# 2. Create Folders

In [12]:
# This cell creates the folders for the yolov8 dataset(yolov8-dataset)
for target in MK_FOLDERS:
    os.makedirs(target)

# 3. Change Type

In [13]:
# This function combines the images with the ImportCoco annotations in the pylabel library, creates the dataset and fills the ExportToYoloV5 file structure.
def coco_to_yolo(annPath, imgPath, outPath, yamlFile):
    dataset = importer.ImportCoco(path=annPath, path_to_images=imgPath)
    dataset.export.ExportToYoloV5(output_path=outPath)
    os.remove(yamlFile)

In [14]:
# This function lives the images in the dataset folder into the newly created yolov8-dataset
def move_img(src_folder, dst_folder):
    files = os.listdir(src_folder)

    for file in files:
        src_file = os.path.join(src_folder, file)
        dst_file = os.path.join(dst_folder, file)
        shutil.copy(src_file, dst_file)

In [15]:
# This cell train, test and val files are run with coco_to_yolo
for x,y,z,t in zip(ANN_PATHS, IMG_PATHS, OUTPUT_PATHS, YAML_FILE):
    coco_to_yolo(x,y,z,t)

Exporting files: 100%|██████████| 1800/1800 [00:03<00:00, 564.60it/s]
Exporting files: 100%|██████████| 7200/7200 [00:18<00:00, 392.25it/s]
Exporting files: 100%|██████████| 1800/1800 [00:02<00:00, 617.50it/s]


In [16]:
# This cell train, test and val images are moved to the yolov8-dataset folder
move_img('./dataset/images/test/', './yolov8-dataset/test/images/')
move_img('./dataset/images/train', './yolov8-dataset/train/images')
move_img('./dataset/images/val', './yolov8-dataset/val/images')

# 4. Create YAML file

In [22]:
# This cell creates the dataset.yaml file used to train the yolov8 model
# The classes in the names dataset are here. The background in the 0. index is not in the annotation,
# but since the bolt and nuts index are defined as 1 and 2 in the annotation,
# it was giving an error during the training phase, so I specified index 0 as background.
# nc class number, path dataset path are paths in test, train and val dataset
dataset = {
    "names": [ 'background', 'bolt', 'nuts'],
    "nc": 3,

    "path": "/content/yolov8-dataset",
    "test": "test/images",
    "train":  "train/images",
    "val":  "val/images",
}

filename = "./yolov8-dataset/dataset.yaml"

In [23]:
# This cell creates the defined yaml file
with open(filename, 'w') as file:
    yaml.dump(dataset, file)