In [132]:
import os
import re
import cv2
import yaml
import shutil
import random
from tqdm import tqdm
from pathlib import Path
from ultralytics import YOLO


In [133]:
IMG_PATH = Path(r'./data/img_dataset')
ANNOTATION_PATH = Path(r'./data/annotation')
train_val_test = (0.8,0.1,0.1)

class_dict = {category_name: idx for idx, category_name in enumerate(os.listdir(IMG_PATH))}
print(class_dict)

{'People': 0, 'Motorbike': 1, 'Cup': 2, 'Bicycle': 3, 'Chair': 4, 'Boat': 5, 'Table': 6, 'Car': 7, 'Bottle': 8, 'Bus': 9, 'Cat': 10, 'Dog': 11}


In [134]:
def clean_file_names():
    DATA_PATH = r'./data'
    for root, _, file_names in os.walk(DATA_PATH):
        if file_names == []:
            continue
        
        for file_name in file_names:
            new_file_name = file_name.lower()
            old_file_path = os.path.join(root, file_name)
            file_path = os.path.join(root, new_file_name)
            os.rename(old_file_path, file_path)

clean_file_names()

In [135]:
def pascal_to_yolo(image_path):
    image_name = os.path.basename(image_path)
    image_dir = os.path.dirname(image_path).replace("\\", "/")
    file_title, _ = os.path.splitext(image_name)
    yolo_anno_path = image_dir.replace("images", "labels") + f"/{file_title}.txt"

    img = cv2.imread(image_path)
    img_height, img_width = img.shape[:2]

    sub_category = image_dir.split("/")[-1]
    path_from_base = os.path.join(sub_category, image_name)
    
    original_annotation_path = os.path.join(ANNOTATION_PATH, path_from_base + ".txt")
    with open(original_annotation_path, 'r') as anno_file:
        pascal_data = anno_file.readlines()[1:]

    pascal_num = list(map(lambda s: s.strip().split(" ")[:5], pascal_data))
    
    yolo_format = []
    for indv_bounding_coordinates in pascal_num:
        indv_bounding_coordinates[0] = class_dict[indv_bounding_coordinates[0]]
        indv_bounding_coordinates[1:] = list(map(int, indv_bounding_coordinates[1:]))
        print(indv_bounding_coordinates)

        class_category = indv_bounding_coordinates[0]
        x_ctr = (indv_bounding_coordinates[1] + indv_bounding_coordinates[3]/2) / img_width
        y_ctr = (indv_bounding_coordinates[2] + indv_bounding_coordinates[4]/2) / img_height
        width = (indv_bounding_coordinates[3]) / img_width
        height = (indv_bounding_coordinates[4]) / img_height
        yolo_format.append([class_category, x_ctr, y_ctr, width, height])
    
    os.makedirs(os.path.dirname(yolo_anno_path), exist_ok=True)
    with open(yolo_anno_path, 'w') as yolo_writer:
        for data in yolo_format:
            yolo_writer.write(f"{' '.join(list(map(str, data)))}\n")
    
    print(f"Converted {image_path} to {yolo_anno_path}")
    

In [136]:
def generate_train_val_test(src_path, train_path, val_path, test_path, split_ratio, seed=None):
    if not os.path.exists(train_path):
        os.makedirs(train_path, exist_ok=True)
    if not os.path.exists(val_path):
        os.makedirs(val_path, exist_ok=True)
    if not os.path.exists(test_path):
        os.makedirs(test_path, exist_ok=True)

    if os.listdir(train_path) != []:
        shutil.rmtree(train_path)
        print(f"Removed existing train path: {train_path}")
    if os.listdir(val_path) != []:
        shutil.rmtree(val_path)
        print(f"Removed existing val path: {val_path}")
    if os.listdir(test_path) != []:
        shutil.rmtree(test_path)
        print(f"Removed existing test path: {test_path}")
    
    if seed != None:
        random.seed(seed)

    file_lists = []
    for (root, _, file_names) in os.walk(src_path):
        if file_names == []:
            continue

        sub_folder_name = os.path.basename(root)

        file_path_list = list(map(lambda path: os.path.join(sub_folder_name, path), file_names))

        random.shuffle(file_path_list)
        file_lists.append(file_path_list)

    for category_specific_list in file_lists:
        for file_name in category_specific_list:
            random_num = random.random()

            if random_num < split_ratio[0]:
                move_path = os.path.join(train_path, os.path.dirname(file_name))
            elif random_num < sum(split_ratio[:2]):
                move_path = os.path.join(val_path, os.path.dirname(file_name)) 
            else:
                move_path = os.path.join(test_path, os.path.dirname(file_name))

            os.makedirs(move_path, exist_ok=True)            
            original_path = os.path.join(src_path, file_name)
            new_name = os.path.join(move_path, os.path.basename(file_name))
            shutil.copy(original_path, move_path)

            pascal_to_yolo(new_name)
            
            print(original_path)
            print(new_name)


generate_train_val_test(
    src_path='data/img_dataset',
    train_path='data/train_val/images/train',
    val_path='data/train_val/images/val',
    test_path='data/test_data/images',
    split_ratio=train_val_test
    )

Removed existing train path: data/train_val/images/train
Removed existing val path: data/train_val/images/val
Removed existing test path: data/test_data/images
[0, 232, 395, 81, 154]
Converted data/train_val/images/train/People/2015_06330.jpg to data/train_val/labels/train/People/2015_06330.txt
data/img_dataset/People/2015_06330.jpg
data/train_val/images/train/People/2015_06330.jpg
[0, 243, 48, 95, 237]
[4, 13, 150, 69, 130]
[4, 91, 166, 74, 96]
[4, 171, 184, 49, 63]
[4, 418, 189, 27, 48]
[4, 439, 187, 38, 60]
[4, 471, 175, 45, 74]
Converted data/train_val/images/train/People/2015_06379.jpg to data/train_val/labels/train/People/2015_06379.txt
data/img_dataset/People/2015_06379.jpg
data/train_val/images/train/People/2015_06379.jpg
[0, 307, 223, 44, 117]
[0, 348, 228, 50, 108]
[10, 459, 214, 178, 201]
Converted data/test_data/images/People/2015_06315.jpg to data/test_data/labels/People/2015_06315.txt
data/img_dataset/People/2015_06315.jpg
data/test_data/images/People/2015_06315.jpg
[0, 3



[2, 845, 310, 76, 75]
[2, 787, 363, 71, 61]
[0, 463, 70, 284, 397]
[0, 756, 140, 128, 162]
[0, 925, 53, 270, 473]
Converted data/train_val/images/train/Cup/2015_04918.png to data/train_val/labels/train/Cup/2015_04918.txt
data/img_dataset/Cup/2015_04918.png
data/train_val/images/train/Cup/2015_04918.png
[2, 142, 161, 342, 264]
Converted data/test_data/images/Cup/2015_04464.jpg to data/test_data/labels/Cup/2015_04464.txt
data/img_dataset/Cup/2015_04464.jpg
data/test_data/images/Cup/2015_04464.jpg
[2, 11, 175, 694, 708]
Converted data/train_val/images/train/Cup/2015_04780.jpg to data/train_val/labels/train/Cup/2015_04780.txt
data/img_dataset/Cup/2015_04780.jpg
data/train_val/images/train/Cup/2015_04780.jpg
[2, 36, 205, 41, 44]
[0, 166, 4, 189, 272]
Converted data/train_val/images/train/Cup/2015_04922.png to data/train_val/labels/train/Cup/2015_04922.txt
data/img_dataset/Cup/2015_04922.png
data/train_val/images/train/Cup/2015_04922.png
[2, 8, 133, 185, 431]
[2, 122, 240, 132, 264]
[2, 208,



[5, 223, 311, 341, 166]
[5, 493, 252, 456, 245]
Converted data/train_val/images/train/Boat/2015_01050.jpg to data/train_val/labels/train/Boat/2015_01050.txt
data/img_dataset/Boat/2015_01050.jpg
data/train_val/images/train/Boat/2015_01050.jpg
[5, 414, 162, 86, 73]
Converted data/train_val/images/val/Boat/2015_00946.jpg to data/train_val/labels/val/Boat/2015_00946.txt
data/img_dataset/Boat/2015_00946.jpg
data/train_val/images/val/Boat/2015_00946.jpg
[6, 344, 193, 131, 118]
Converted data/train_val/images/val/Table/2015_07007.jpg to data/train_val/labels/val/Table/2015_07007.txt
data/img_dataset/Table/2015_07007.jpg
data/train_val/images/val/Table/2015_07007.jpg
[4, 10, 156, 300, 363]
[4, 113, 134, 145, 284]
[4, 177, 102, 92, 215]
[4, 234, 45, 128, 165]
[4, 526, 37, 111, 182]
[4, 597, 75, 117, 263]
[4, 635, 129, 171, 276]
[4, 602, 145, 294, 369]
[6, 182, 70, 547, 452]
[6, 727, 107, 171, 57]
[6, 654, 84, 85, 22]
Converted data/train_val/images/train/Table/2015_07248.jpg to data/train_val/l



[6, 277, 399, 164, 120]
[6, 1000, 479, 273, 234]
[6, 636, 513, 496, 200]
[0, 603, 65, 183, 633]
[4, 412, 292, 236, 283]
[4, 9, 350, 544, 361]
Converted data/train_val/images/train/Table/2015_07347.png to data/train_val/labels/train/Table/2015_07347.txt
data/img_dataset/Table/2015_07347.png
data/train_val/images/train/Table/2015_07347.png
[6, 26, 577, 340, 231]
[4, 186, 578, 280, 233]
[4, 468, 663, 182, 243]
Converted data/train_val/images/train/Table/2015_06965.jpg to data/train_val/labels/train/Table/2015_06965.txt
data/img_dataset/Table/2015_06965.jpg
data/train_val/images/train/Table/2015_06965.jpg
[4, 135, 144, 83, 124]
[4, 42, 150, 85, 140]
[2, 114, 131, 18, 20]
[6, 6, 141, 243, 117]
[6, 180, 295, 453, 181]
[6, 302, 127, 103, 103]
Converted data/train_val/images/train/Table/2015_07113.jpg to data/train_val/labels/train/Table/2015_07113.txt
data/img_dataset/Table/2015_07113.jpg
data/train_val/images/train/Table/2015_07113.jpg
[6, 10, 130, 423, 201]
Converted data/train_val/images/t



[7, 976, 1053, 717, 471]
[7, 2223, 1115, 648, 398]
[7, 1572, 1119, 243, 156]
[7, 1811, 1129, 464, 246]
Converted data/train_val/images/train/Car/2015_02668.jpg to data/train_val/labels/train/Car/2015_02668.txt
data/img_dataset/Car/2015_02668.jpg
data/train_val/images/train/Car/2015_02668.jpg
[7, 20, 22, 3204, 1505]
Converted data/train_val/images/train/Car/2015_02687.jpg to data/train_val/labels/train/Car/2015_02687.txt
data/img_dataset/Car/2015_02687.jpg
data/train_val/images/train/Car/2015_02687.jpg
[7, 392, 236, 142, 92]
Converted data/train_val/images/train/Car/2015_02463.jpg to data/train_val/labels/train/Car/2015_02463.txt
data/img_dataset/Car/2015_02463.jpg
data/train_val/images/train/Car/2015_02463.jpg
[7, 294, 326, 88, 79]
[7, 363, 340, 124, 92]
[7, 115, 392, 152, 158]
[7, 2, 350, 162, 199]
Converted data/train_val/images/train/Car/2015_02519.jpg to data/train_val/labels/train/Car/2015_02519.txt
data/img_dataset/Car/2015_02519.jpg
data/train_val/images/train/Car/2015_02519.jpg



[7, 553, 1297, 1127, 471]
[7, 693, 1729, 971, 406]
Converted data/train_val/images/val/Car/2015_02633.jpg to data/train_val/labels/val/Car/2015_02633.txt
data/img_dataset/Car/2015_02633.jpg
data/train_val/images/val/Car/2015_02633.jpg
[7, 44, 883, 2495, 1344]
[7, 2625, 959, 617, 516]
Converted data/train_val/images/train/Car/2015_02656.jpg to data/train_val/labels/train/Car/2015_02656.txt
data/img_dataset/Car/2015_02656.jpg
data/train_val/images/train/Car/2015_02656.jpg
[7, 519, 328, 113, 143]
Converted data/train_val/images/train/Car/2015_02573.jpg to data/train_val/labels/train/Car/2015_02573.txt
data/img_dataset/Car/2015_02573.jpg
data/train_val/images/train/Car/2015_02573.jpg
[7, 131, 68, 103, 91]
[7, 169, 85, 311, 215]
[0, 221, 102, 40, 42]
Converted data/train_val/images/train/Car/2015_02484.jpg to data/train_val/labels/train/Car/2015_02484.txt
data/img_dataset/Car/2015_02484.jpg
data/train_val/images/train/Car/2015_02484.jpg
[7, 276, 380, 65, 51]
[7, 212, 379, 50, 40]
[7, 89, 38



[7, 485, 563, 100, 63]
[7, 556, 573, 89, 89]
[7, 617, 568, 133, 115]
[7, 706, 582, 210, 140]
Converted data/train_val/images/train/Car/2015_02801.jpg to data/train_val/labels/train/Car/2015_02801.txt
data/img_dataset/Car/2015_02801.jpg
data/train_val/images/train/Car/2015_02801.jpg
[7, 191, 306, 882, 426]
Converted data/train_val/images/train/Car/2015_02891.jpg to data/train_val/labels/train/Car/2015_02891.txt
data/img_dataset/Car/2015_02891.jpg
data/train_val/images/train/Car/2015_02891.jpg
[7, 646, 279, 355, 173]
Converted data/train_val/images/train/Car/2015_02895.jpg to data/train_val/labels/train/Car/2015_02895.txt
data/img_dataset/Car/2015_02895.jpg
data/train_val/images/train/Car/2015_02895.jpg
[7, 760, 435, 91, 67]
Converted data/train_val/images/train/Car/2015_02520.jpg to data/train_val/labels/train/Car/2015_02520.txt
data/img_dataset/Car/2015_02520.jpg
data/train_val/images/train/Car/2015_02520.jpg
[7, 20, 350, 708, 374]
[7, 6, 378, 133, 140]
[4, 727, 398, 157, 55]
Converted



[8, 1441, 1720, 309, 791]
Converted data/train_val/images/train/Bottle/2015_01732.jpg to data/train_val/labels/train/Bottle/2015_01732.txt
data/img_dataset/Bottle/2015_01732.jpg
data/train_val/images/train/Bottle/2015_01732.jpg
[8, 203, 68, 123, 352]
[8, 327, 88, 71, 229]
[6, 5, 243, 553, 199]
Converted data/test_data/images/Bottle/2015_01787.jpg to data/test_data/labels/Bottle/2015_01787.txt
data/img_dataset/Bottle/2015_01787.jpg
data/test_data/images/Bottle/2015_01787.jpg
[8, 1182, 425, 325, 1084]
[8, 1432, 667, 545, 1266]
Converted data/train_val/images/train/Bottle/2015_01718.jpg to data/train_val/labels/train/Bottle/2015_01718.txt
data/img_dataset/Bottle/2015_01718.jpg
data/train_val/images/train/Bottle/2015_01718.jpg
[8, 3, 332, 37, 119]
[8, 69, 278, 51, 116]
[8, 173, 256, 42, 104]
[8, 386, 283, 36, 115]
[8, 479, 339, 52, 109]
[8, 531, 335, 48, 132]
[8, 266, 131, 31, 57]
[8, 208, 368, 52, 109]
[2, 283, 278, 33, 59]
[6, 10, 303, 581, 174]
[0, 3, 51, 189, 280]
[0, 250, 90, 164, 230

In [137]:
def create_data_yaml(path='./'):
    data = {
        'path': path,
        'train': './data/train_data',
        'val': './data/val_data',
        'nc': len(class_dict),
        'names': list(class_dict.keys()),
    }

    with open('data.yaml', 'w') as f:
        yaml.dump(data, f, default_flow_style=False)

# Example usage
create_data_yaml()

In [138]:
with open("data.yaml") as f:
    cfg = yaml.safe_load(f)

print(cfg)

{'names': ['People', 'Motorbike', 'Cup', 'Bicycle', 'Chair', 'Boat', 'Table', 'Car', 'Bottle', 'Bus', 'Cat', 'Dog'], 'nc': 12, 'path': './', 'train': './data/train_data', 'val': './data/val_data'}


In [139]:
model = YOLO('./snapshots/yolov8n.pt')

print(model.info())

YOLOv8n summary: 129 layers, 3,157,200 parameters, 0 gradients, 8.9 GFLOPs
(129, 3157200, 0, 8.8575488)


In [140]:
config_settings = {
    # Output Config
    "project": "./YOLOv8_outputs/runs/train",
    "name": "yolov8_exp",

    # Train Config
    "epochs": 1000,
    "patience": 10,
    "batch": 15,
    "imgsz": 640,
    "save": True,
    "save_period": 1,
    "cache": False,
    "device": 0,
    "workers": 8,
    "project": "runs/train",
    "name": "yolov8_exp",
    "exist_ok": True,
    "pretrained": True,
    "optimizer": "auto",
    "deterministic": False,
    "classes":list(class_dict.keys()),
    "rect": False,
    "resume": True,
    "visualize": True,

    # "lr0": 0.001,
    # "lrf": 0.01,
    # "momentum": 0.937,
    # "weight_decay": 0.0005,
    # "warmup_epochs": 3.0,
    # "warmup_momentum": 0.8,
    # "warmup_bias_lr": 0.1,
    # "box": 7.5,
    # "cls": 0.5,
    # "dfl": 1.5,

    # Augmentation Config
    # "hsv_h": 0.015,
    # "hsv_s": 0.7,
    # "hsv_v": 0.4,
    # "degrees": 0.0,
    # "translate": 0.1,
    # "scale": 0.5,
    # "shear": 0.0,
    # "perspective": 0.0,
    # "flipud": 0.0,
    # "fliplr": 0.5,
    # "mosaic": 1.0,
    # "mixup": 0.0,
    # "copy_paste": 0.0,
    # "dropout": 0.0,

}


In [143]:
model.train(
    data="data.yaml",
    **config_settings
)


Ultralytics 8.3.107 🚀 Python-3.10.17 torch-2.6.0+cu126 CUDA:0 (NVIDIA GeForce GTX 1080, 8106MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=snapshots/yolov8n.pt, data=data.yaml, epochs=500, time=None, patience=50, batch=15, imgsz=640, save=True, save_period=-1, cache=disk, device=0, workers=8, project=YOLOv8, name=yolov8n3, exist_ok=False, pretrained=False, optimizer=SGD, verbose=False, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=snapshots/yolov8n.pt, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=False, val=True, split=val, save_json=False, conf=0.001, iou=0.7, max_det=300, half=True, dnn=False, plots=True, source=ultralytics/assets/, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_lab

RuntimeError: Dataset 'data.yaml' error ❌ 
Dataset 'data.yaml' images not found ⚠️, missing path '/home/yc-family/Documents/code/mlgp/data/val_data'
Note dataset download directory is '/home/yc-family/Documents/code/mlgp_new/datasets'. You can update this in '/home/yc-family/.config/Ultralytics/settings.json'