## Transform the format of the data

In [None]:
import os
import random
import shutil
import cv2

# Original dataset folder structure:
image_folder = 'train/train_image'        # train_image folder containing images
annotation_folder = 'train/train_annotation'    # train_annotation folder containing annotations
output_root = 'Dataset'               # output folder for YOLO format

# YOLO class mapping
class_map = {
    'Straight_Knife': 0,
    'Folding_Knife': 1,
    'Scissor': 2,
    'Utility_Knife': 3,
    'Multi-tool_Knife': 4
}

# create output directories
for split in ['train', 'val']:
    os.makedirs(os.path.join(output_root, 'images', split), exist_ok=True)
    os.makedirs(os.path.join(output_root, 'labels', split), exist_ok=True)

# get all annotation files
annotation_files = sorted(os.listdir(annotation_folder))
random.seed(42)
random.shuffle(annotation_files)

# split the dataset into train and validation sets (80% train, 20% val)
split_idx = int(0.8 * len(annotation_files))
train_files = annotation_files[:split_idx]
val_files = annotation_files[split_idx:]

def process_split(split_files, split_name):
    for anno_file in split_files:
        with open(os.path.join(annotation_folder, anno_file), 'r') as f:
            lines = f.readlines()

        yolo_labels = []
        for line in lines:
            parts = line.strip().split()
            if len(parts) != 6:
                continue

            img_name, cls_name, x1, y1, x2, y2 = parts
            x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
            class_id = class_map.get(cls_name, -1)
            if class_id == -1:
                continue

            # read image dimensions
            img_path = os.path.join(image_folder, img_name)
            if not os.path.exists(img_path):
                continue
            img = cv2.imread(img_path)
            h, w = img.shape[:2]

            # scale coordinates to YOLO format
            x_center = (x1 + x2) / 2 / w
            y_center = (y1 + y2) / 2 / h
            box_w = (x2 - x1) / w
            box_h = (y2 - y1) / h

            yolo_labels.append(f"{class_id} {x_center:.6f} {y_center:.6f} {box_w:.6f} {box_h:.6f}")

        # save YOLO labels
        base_name = os.path.splitext(anno_file)[0]
        label_path = os.path.join(output_root, 'labels', split_name, f"{base_name}.txt")
        with open(label_path, 'w') as f:
            f.write('\n'.join(yolo_labels))

        # save corresponding image
        shutil.copy2(img_path, os.path.join(output_root, 'images', split_name, img_name))

# execute processing for both splits
process_split(train_files, 'train')
process_split(val_files, 'val')


In [None]:
import os
import shutil
import cv2

# mapping of class names to IDs
class_map = {
    'Straight_Knife': 0,
    'Folding_Knife': 1,
    'Scissor': 2,
    'Utility_Knife': 3,
    'Multi-tool_Knife': 4
}

# set up paths for test dataset
image_folder = 'test/test_image'
annotation_folder = 'test/test_annotation'
output_root = 'Dataset'
test_split_txts = [
    'test/test_knife.txt',
    'test/test_knife-1.txt',
    'test/test_knife-2.txt',
    'test/test_knife-3.txt',
]

def process_split(split_txt_path, split_name):
    # create output directories for images and labels
    img_out_dir = os.path.join(output_root, 'images', split_name)
    lbl_out_dir = os.path.join(output_root, 'labels', split_name)
    os.makedirs(img_out_dir, exist_ok=True)
    os.makedirs(lbl_out_dir, exist_ok=True)

    # clear existing files in output directories
    for f in os.listdir(img_out_dir):
        os.remove(os.path.join(img_out_dir, f))
    for f in os.listdir(lbl_out_dir):
        os.remove(os.path.join(lbl_out_dir, f))

    # get image IDs from the split text file
    with open(split_txt_path, 'r') as f:
        image_ids = [line.strip() for line in f if line.strip()]

    for img_id in image_ids:
        img_name = img_id + '.jpg'
        anno_file = img_id + '.txt'

        img_path = os.path.join(image_folder, img_name)
        anno_path = os.path.join(annotation_folder, anno_file)

        if not os.path.exists(img_path) or not os.path.exists(anno_path):
            print(f"❗ missing  file: {img_path} or {anno_path}")
            continue

        # get image dimensions
        img = cv2.imread(img_path)
        if img is None:
            print(f"❗ fail to read the file: {img_path}")
            continue
        h, w = img.shape[:2]

        yolo_labels = []
        with open(anno_path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) != 6:
                    print(f"⚠️ format error: {anno_path} - {line.strip()}")
                    continue

                img_name_read, cls_name, x1, y1, x2, y2 = parts
                if img_name_read != img_name:
                    print(f"⚠️ The {img_name_read} is not algin with {img_name} ")
                    continue

                class_id = class_map.get(cls_name, -1)
                if class_id == -1:
                    print(f"⚠️ unknown : {cls_name}")
                    continue

                x1, y1, x2, y2 = map(float, [x1, y1, x2, y2])
                x_center = (x1 + x2) / 2 / w
                y_center = (y1 + y2) / 2 / h
                box_w = (x2 - x1) / w
                box_h = (y2 - y1) / h

                yolo_labels.append(f"{class_id} {x_center:.6f} {y_center:.6f} {box_w:.6f} {box_h:.6f}")

        # save YOLO labels
        out_label_path = os.path.join(lbl_out_dir, f"{img_id}.txt")
        with open(out_label_path, 'w') as f:
            f.write('\n'.join(yolo_labels))

        # save corresponding image
        shutil.copy2(img_path, os.path.join(img_out_dir, img_name))

# execute processing for all test splits
for idx, path in enumerate(test_split_txts):
    split_name = f"test_{idx}"
    process_split(path, split_name)

print("All splits processed successfully!")
