In [None]:
import os
import re
import cv2
import yaml
import shutil
import random
import torch
from tqdm import tqdm
from pathlib import Path
from ultralytics import YOLO


In [None]:
torch.cuda.set_device(0) # Set to your desired GPU number
IMG_PATH = Path(r'./data/img_dataset')
ANNOTATION_PATH = Path(r'./data/annotation')
train_val_test = (0.8,0.1,0.1)

class_dict = {category_name: idx for idx, category_name in enumerate(os.listdir(IMG_PATH))}
print(class_dict)

In [None]:
# def clean_file_names():
#     DATA_PATH = r'./data'
#     for root, _, file_names in os.walk(DATA_PATH):
#         if file_names == []:
#             continue
        
#         for file_name in file_names:
#             new_file_name = file_name.lower()
#             old_file_path = os.path.join(root, file_name)
#             file_path = os.path.join(root, new_file_name)
#             os.rename(old_file_path, file_path)

# clean_file_names()

In [None]:
def pascal_to_yolo(image_path):
    image_name = os.path.basename(image_path)
    image_dir = os.path.dirname(image_path).replace("\\", "/")
    file_title, _ = os.path.splitext(image_name)
    yolo_anno_path = image_dir.replace("images", "labels") + f"/{file_title}.txt"

    img = cv2.imread(image_path)
    img_height, img_width = img.shape[:2]

    sub_category = image_dir.split("/")[-1]
    path_from_base = os.path.join(sub_category, image_name)
    
    original_annotation_path = os.path.join(ANNOTATION_PATH, path_from_base + ".txt")
    with open(original_annotation_path, 'r') as anno_file:
        pascal_data = anno_file.readlines()[1:]

    pascal_num = list(map(lambda s: s.strip().split(" ")[:5], pascal_data))
    
    yolo_format = []
    for indv_bounding_coordinates in pascal_num:
        indv_bounding_coordinates[0] = class_dict[indv_bounding_coordinates[0]]
        indv_bounding_coordinates[1:] = list(map(int, indv_bounding_coordinates[1:]))
        print(indv_bounding_coordinates)

        class_category = indv_bounding_coordinates[0]
        x_ctr = (indv_bounding_coordinates[1] + indv_bounding_coordinates[3]/2) / img_width
        y_ctr = (indv_bounding_coordinates[2] + indv_bounding_coordinates[4]/2) / img_height
        width = (indv_bounding_coordinates[3]) / img_width
        height = (indv_bounding_coordinates[4]) / img_height
        yolo_format.append([class_category, x_ctr, y_ctr, width, height])
    
    os.makedirs(os.path.dirname(yolo_anno_path), exist_ok=True)
    with open(yolo_anno_path, 'w') as yolo_writer:
        for data in yolo_format:
            yolo_writer.write(f"{' '.join(list(map(str, data)))}\n")
    
    print(f"Converted {image_path} to {yolo_anno_path}")
    

In [None]:
# testimg = r'C:\Users\Admin\Documents\MLGP\mlgp_full\data\img_dataset\Dog\2015_05337.jpeg'
# print(cv2.imread(testimg).shape)

In [None]:
def generate_train_val_test(src_path, train_path, val_path, test_path, split_ratio, seed=None):
    if not os.path.exists(train_path):
        os.makedirs(train_path, exist_ok=True)
    if not os.path.exists(val_path):
        os.makedirs(val_path, exist_ok=True)
    if not os.path.exists(test_path):
        os.makedirs(test_path, exist_ok=True)

    if os.listdir(train_path) != []:
        shutil.rmtree(train_path)
        print(f"Removed existing train path: {train_path}")
    if os.listdir(val_path) != []:
        shutil.rmtree(val_path)
        print(f"Removed existing val path: {val_path}")
    if os.listdir(test_path) != []:
        shutil.rmtree(test_path)
        print(f"Removed existing test path: {test_path}")
    
    if seed != None:
        random.seed(seed)

    file_lists = []
    for (root, _, file_names) in os.walk(src_path):
        if file_names == []:
            continue

        sub_folder_name = os.path.basename(root)

        file_path_list = list(map(lambda path: os.path.join(sub_folder_name, path), file_names))

        random.shuffle(file_path_list)
        file_lists.append(file_path_list)

    for category_specific_list in file_lists:
        for file_name in category_specific_list:
            random_num = random.random()

            if random_num < split_ratio[0]:
                move_path = os.path.join(train_path, os.path.dirname(file_name))
            elif random_num < sum(split_ratio[:2]):
                move_path = os.path.join(val_path, os.path.dirname(file_name)) 
            else:
                move_path = os.path.join(test_path, os.path.dirname(file_name))

            os.makedirs(move_path, exist_ok=True)            
            original_path = os.path.join(src_path, file_name)
            new_name = os.path.join(move_path, os.path.basename(file_name))
            shutil.copy(original_path, move_path)

            pascal_to_yolo(new_name)
            
            print(original_path)
            print(new_name)


# generate_train_val_test(
#     src_path='data/img_dataset',
#     train_path='data/train_val/images/train',
#     val_path='data/train_val/images/val',
#     test_path='data/test_data/images',
#     split_ratio=train_val_test
#     )

In [None]:
# def create_data_yaml(path='./'):
#     data = {
#         'path': path,
#         'train': './train_val/images/train',
#         'val': './train_val/images/val',
#         'nc': len(class_dict),
#         'names': list(class_dict.keys()),
#     }

#     with open('data.yaml', 'w') as f:
#         yaml.dump(data, f, default_flow_style=False)

# # Example usage
# create_data_yaml()

In [None]:
# check for different file names for images and labels
root_data = r'.\data'
test_data = os.path.join(root_data, "test_data")
train_val = os.path.join(root_data, "train_val")


def check_file_names(folder): # test_data or train_val
    img_folder = os.path.join(folder, "images")
    label_folder = os.path.join(folder, "labels")
    train_img = os.path.join(img_folder, "train")
    val_img = os.path.join(img_folder, "val")
    train_label = os.path.join(label_folder, "train")
    val_label = os.path.join(label_folder, "val")

    for (root, dirname, filename) in os.walk(train_img):
        for objtype in dirname:
            obj_path = os.path.join(root, objtype)
            for (root, dirname, file_names) in os.walk(obj_path):
                for file_name in file_names:
                    file_name = os.path.splitext(file_name)[0]
                    label_name = os.path.join(train_label, objtype, file_name + ".txt")
                    if not os.path.exists(label_name):
                        print(f"Missing label file for {file_name} in {label_name}")

check_file_names(train_val)
check_file_names(test_data)

In [None]:
# check label files if width and height >1
def check_label_files(folder):

    label_folder = os.path.join(folder, "labels")
    print(label_folder)
    for root, dirname, files in os.walk(label_folder):
        if files == []:
            continue
        for file in files:
            if "cache" in file:
                continue
            
            label_path = os.path.join(root, file)
            with open(label_path, 'r') as f:
                lines = f.readlines()
                new_lines = []
                for line in lines:
                    values = list(map(float, line.strip().split()))
                    if values[1] > 1 or values[2] > 1 or values[3] > 1 or values[4] > 1:
                        values[1] = 1.0 if values[1] > 1 else values[1]
                        values[2] = 1.0 if values[2] > 1 else values[2]
                        values[3] = 1.0 if values[3] > 1 else values[3]
                        values[4] = 1.0 if values[4] > 1 else values[4]
                        print(f"Invalid bounding box in {label_path}: {line.strip()}")
                    if values[1] < 0 or values[2] < 0 or values[3] < 0 or values[4] < 0:
                        values[1] = 0.0 if values[1] < 0 else values[1]
                        values[2] = 0.0 if values[2] < 0 else values[2]
                        values[3] = 0.0 if values[3] < 0 else values[3]
                        values[4] = 0.0 if values[4] < 0 else values[4]
                        print(f"Invalid bounding box in {label_path}: {line.strip()}")
                    if len(values) != 5:
                        print(f"Invalid label format in {label_path}: {line.strip()}")
                    new_line = " ".join(str(v) for v in values) + "\n"
                    new_lines.append(new_line)
                # Overwrite the original file with the corrected content
                with open(label_path, 'w') as f:
                    f.writelines(new_lines)
                
check_label_files(train_val)
check_label_files(test_data)

In [None]:
import preprocessors.zero_dce.lowlight_test as lowlight_test
import torchvision

new_path = r'data/zero_dce_train_val/images'
for (root, dirs, file_names) in os.walk(r'./data/train_val/images'):
    if file_names == []:
        continue

    for file_name in file_names:
        if "cache" in file_name:
            continue

        file_title, _ = os.path.splitext(file_name)
        file_path = os.path.join(root, file_name)
        file_path = file_path.replace("\\", "/")
        base_name = os.path.dirname(file_path).split("/")[-1]
        train_or_val = os.path.dirname(file_path).split("/")[-2]
        category_file_name = os.path.join(train_or_val, base_name, file_name)
        full_new_path = os.path.join(new_path, category_file_name)
    
        os.makedirs(os.path.dirname(full_new_path), exist_ok=True)
        returned_img = lowlight_test.lowlight(file_path, save_file=False)
        torchvision.utils.save_image(returned_img, full_new_path)

        label_name = file_title + '.txt'
        label_path = os.path.join(root.replace("images", "labels"), label_name)
        category_file_name = os.path.join(train_or_val, base_name, label_name)
        new_label_path = os.path.join(new_path.replace("images", "labels"), category_file_name)
        os.makedirs(os.path.dirname(new_label_path), exist_ok=True)
        
        shutil.copy(label_path, new_label_path)

In [None]:
model = YOLO('yolov8n.yaml')
model.to('cuda')

print(model.info())

In [None]:
train_config = {
    'data': 'data2.yaml',
    'epochs': 100,
    'patience': 10,
    'batch': 16,
    'imgsz': 640,
    'save': True,
    'save_period': 1,
    'project': r'../model_runs',
    'name': 'yolov8n_custom',
    'exist_ok': False,
    'seed': 0,
    'resume': True,
    'lr0': 0.01,  c#0.1x
    'lrf': 0.001,
    'dropout': 0.0,
    'val': True,
    'plots':True,
}

val_config = {
    'data': 'data2.yaml',
    'batch': 16,
    'imgsz': 640,
    'conf': 0.25,
    'iou': 0.5,
    'max_det': 100,
    'plots': True,
    'split': 'val',
    'project': r'../model_runs',
    'name': 'val'
}

In [None]:
model.train(
    **train_config
)
