# This script merges different COCO dataset annotation files and create a new Dataset in YOLO format

## Merge COCO to other Datasets

In [None]:
from datasetsConverters import mergeCOCOAndConvertToYOLO

mergeCOCOAndConvertToYOLO(
    merge_coco_paths= ["COCO2017\\train\\_annotations.coco.json", "padel\\train\\_annotations.coco.json", "FireEx\\train\\_annotations.coco.json"],
    creation_dst_coco="./mergedCocoDataset_fire_padel",
    creation_dst_yolo="./mergedCocoDataset_fire_padel_yolo",
    max_images = 540,
    splits_perc=[0.8, 0.1, 0.1],
    zipName="COCO_82c_34k_v1",
    random_state=42
    )


## Custom COCO

In [None]:
from datasetsConverters import *

max_image_per_category = 200
srcCocoPath = "COCO2017\\train\\_annotations.coco.json"
dstCocoPath = f"COCO2017_reduced_{max_image_per_category}"

annotations = loadAnnotations(srcCocoPath)
annotations["COCO2017"] = CocoSubset(annotations["COCO2017"], max_image_per_category=max_image_per_category)

createMergedCocoDataset(newCocoAnnotation=annotations["COCO2017"], srcCocoDirs=[srcCocoPath], dstCocoDir=dstCocoPath)

In [2]:
from datasetsConverters import *
max_image_per_category = 200

mergeCOCOAndConvertToYOLO(
    merge_coco_paths= [
        f"COCO2017_reduced_{max_image_per_category}\\train\\_annotations.coco.json", 
        "Fire_coco\\train\\_annotations.coco.json",
        "Padel_coco\\train\\_annotations.coco.json",
        ],
    creation_dst_coco="./mergedCocoDataset_fire_padel",
    creation_dst_yolo="./mergedCocoDataset_fire_padel_yolo",
    max_images = None,
    splits_perc=[0.7, 0.2, 0.1],
    zipName=None, #"COCO_82c_38k_Mosaicv1",
    random_state=42
    )

Loading COCO2017_reduced_200 annotation file JSON from: COCO2017_reduced_200\train\_annotations.coco.json 
Done.
Loading Fire_coco annotation file JSON from: Fire_coco\train\_annotations.coco.json 
Done.
Loading Padel_coco annotation file JSON from: Padel_coco\train\_annotations.coco.json 
Done.
Starting merge of annotations
Creating the new merged COCO dataset in folder: ./mergedCocoDataset_fire_padel
Creating the new CocoDataset


Coping new Images: 100%|██████████| 17020/17020 [04:51<00:00, 58.32it/s] 


Done

Converting the new COCO dataset Yolo in folder: ./mergedCocoDataset_fire_padel_yolo


Annotations D:\Projects\RoboTO\CocoDatasetEnhanced\mergedCocoDataset_fire_padel\train\_annotations.coco.json: 100%|██████████| 17018/17018 [00:27<00:00, 617.54it/s] 
Coping all images in the yolo dataset folder train:   7%|▋         | 864/11914 [00:26<05:36, 32.85it/s]

01196_jpg.rf.b1e44d7239dd10be6b9b7c17c2336d5a.txt not found


Coping all images in the yolo dataset folder train:  18%|█▊        | 2141/11914 [01:18<04:36, 35.40it/s]

000000581881.txt not found


Coping all images in the yolo dataset folder train: 100%|██████████| 11914/11914 [06:54<00:00, 28.71it/s]
Coping all images in the yolo dataset folder val: 100%|██████████| 3404/3404 [01:54<00:00, 29.78it/s]
Coping all images in the yolo dataset folder test: 100%|██████████| 1702/1702 [01:00<00:00, 28.13it/s]


In [5]:
convertCOCOtoYOLOv8("./mergedCocoDataset_fire_padel"+"/train", "./mergedCocoDataset_fire_padel_yolo", zipName=None, splits_perc=[0.7, 0.2, 0.1])

Annotations D:\Projects\RoboTO\CocoDatasetEnhanced\mergedCocoDataset_fire_padel\train\_annotations.coco.json: 100%|██████████| 17018/17018 [00:25<00:00, 663.28it/s] 
Coping all images in the yolo dataset folder train: 100%|██████████| 11914/11914 [03:35<00:00, 55.33it/s]
Coping all images in the yolo dataset folder val: 100%|██████████| 3404/3404 [01:11<00:00, 47.53it/s]
Coping all images in the yolo dataset folder test:  53%|█████▎    | 897/1702 [00:16<00:13, 58.47it/s]

01196_jpg.rf.b1e44d7239dd10be6b9b7c17c2336d5a.txt not found


Coping all images in the yolo dataset folder test:  97%|█████████▋| 1656/1702 [00:32<00:01, 33.69it/s]

000000581881.txt not found


Coping all images in the yolo dataset folder test: 100%|██████████| 1702/1702 [00:35<00:00, 48.47it/s]


In [6]:
def remove_labels_from_folders(label_folders, data_yaml_path, labels_to_remove):
    for folder in label_folders:
        print(folder)
        for filename in tqdm.tqdm(os.listdir(folder)):
            if filename.endswith(".txt"):
                filepath = os.path.join(folder, filename)
                with open(filepath, 'r') as f:
                    lines = f.readlines()
                with open(filepath, 'w') as f:
                    for line in lines:
                        class_index, *rest = line.strip().split(' ')
                        if int(class_index) not in labels_to_remove:
                            f.write(line)
    
    # Update data.yaml
    with open(data_yaml_path, 'r') as f:
        data = yaml.safe_load(f)
    
    if 'names' in data:
        names = data['names']
        for label_to_remove in labels_to_remove:
            if label_to_remove in names:
                del names[label_to_remove]
    
    with open(data_yaml_path, 'w') as f:
        yaml.dump(data, f)
        
        
# Usage example:
label_paths = ["mergedCocoDataset_fire_padel_yolo/test/labels",
             "mergedCocoDataset_fire_padel_yolo/train/labels",
             "mergedCocoDataset_fire_padel_yolo/val/labels"]
data_yaml_path = "mergedCocoDataset_fire_padel_yolo/data.yaml"
labels_to_remove = [80, 82]  # List of labels to remove

remove_labels_from_folders(label_paths, data_yaml_path, labels_to_remove)

# labels_to_remap = {80: "fire_extinguisher", 81: "padel_rackets"}  # Dictionary of labels to remap
# modify_yolo_dataset(label_dirs, data_yaml_path, labels_to_remove, labels_to_remap)

mergedCocoDataset_fire_padel_yolo/test/labels


100%|██████████| 1700/1700 [00:09<00:00, 185.21it/s]


mergedCocoDataset_fire_padel_yolo/train/labels


100%|██████████| 11914/11914 [01:41<00:00, 117.71it/s]


mergedCocoDataset_fire_padel_yolo/val/labels


100%|██████████| 3404/3404 [00:29<00:00, 113.92it/s]


In [8]:
def modify_label_numbers(label_folders, data_yaml_path, label_to_modify, new_label):
    for folder in label_folders:
        print(folder)
        for filename in tqdm.tqdm(os.listdir(folder)):
            if filename.endswith(".txt"):
                filepath = os.path.join(folder, filename)
                with open(filepath, 'r') as f:
                    lines = f.readlines()
                with open(filepath, 'w') as f:
                    for line in lines:
                        class_index, *rest = line.strip().split(' ')
                        if int(class_index) == label_to_modify:
                            f.write(f"{new_label} {' '.join(rest)}\n")
                        else:
                            f.write(line)
    
    # Update data.yaml
    with open(data_yaml_path, 'r') as f:
        data = yaml.safe_load(f)
    
    if 'names' in data:
        names = data['names']
        if label_to_modify in names:
            names[new_label] = names.pop(label_to_modify)
    
    with open(data_yaml_path, 'w') as f:
        yaml.dump(data, f)
        
        

label_to_modify = 83  # Label number to modify
new_label = 81  # New label number
modify_label_numbers(label_paths, data_yaml_path, label_to_modify, new_label)

mergedCocoDataset_fire_padel_yolo/test/labels


100%|██████████| 1700/1700 [00:12<00:00, 139.85it/s]


mergedCocoDataset_fire_padel_yolo/train/labels


100%|██████████| 11914/11914 [01:30<00:00, 132.06it/s]


mergedCocoDataset_fire_padel_yolo/val/labels


100%|██████████| 3404/3404 [00:25<00:00, 132.50it/s]
