# Setup

In [None]:
import os
import cv2
import torch
import json
import shutil

from PIL import Image

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Dataset matching

## Initialize

In [None]:
id_class = {
    "0": "background",
    "1": "person",
    "2": "bicycle",
    "3": "car",
    "4": "motorcycle",
    "5": "airplane",
    "6": "bus",
    "7": "train",
    "8": "truck",
    "9": "boat",
    "10": "traffic light",
    "11": "fire hydrant",
    "12": "stop sign",
    "13": "parking meter",
    "14": "bench",
    "15": "bird",
    "16": "cat",
    "17": "dog",
    "18": "horse",
    "19": "sheep",
    "20": "cow",
    "21": "elephant",
    "22": "bear",
    "23": "zebra",
    "24": "giraffe",
    "25": "backpack",
    "26": "umbrella",
    "27": "handbag",
    "28": "tie",
    "29": "suitcase",
    "30": "frisbee",
    "31": "skis",
    "32": "snowboard",
    "33": "sports ball",
    "34": "kite",
    "35": "baseball bat",
    "36": "baseball glove",
    "37": "skateboard",
    "38": "surfboard",
    "39": "tennis racket",
    "40": "bottle",
    "41": "wine glass",
    "42": "cup",
    "43": "fork",
    "44": "knife",
    "45": "spoon",
    "46": "bowl",
    "47": "banana",
    "48": "apple",
    "49": "sandwich",
    "50": "orange",
    "51": "broccoli",
    "52": "carrot",
    "53": "hot dog",
    "54": "pizza",
    "55": "donut",
    "56": "cake",
    "57": "chair",
    "58": "couch",
    "59": "potted plant",
    "60": "bed",
    "61": "dining table",
    "62": "toilet",
    "63": "tv",
    "64": "laptop",
    "65": "mouse",
    "66": "remote",
    "67": "keyboard",
    "68": "cell phone",
    "69": "microwave",
    "70": "oven",
    "71": "toaster",
    "72": "sink",
    "73": "refrigerator",
    "74": "book",
    "75": "clock",
    "76": "vase",
    "77": "scissors",
    "78": "teddy bear",
    "79": "hair drier",
    "80": "toothbrush"
}

In [None]:
class_id = {
    "ambulance": 0,
    "army vehicle": 1,
    "auto rickshaw": 2,
    "bicycle": 3,
    "bus": 4,
    "car": 5,
    "garbagevan": 6,
    "human hauler": 7,
    "minibus": 8,
    "minivan": 9,
    "motorbike": 10,
    "pickup": 11,
    "policecar": 12,
    "rickshaw": 13,
    "scooter": 14,
    "suv": 15,
    "taxi": 16,
    "three wheelers -CNG-": 17,
    "truck": 18,
    "van": 19,
    "wheelbarrow": 20,
    "background": 21,
    "person": 22,
    "airplane": 23,
    "motorcycle": 24,
    "train": 25,
    "boat": 26,
    "traffic light": 27,
    "fire hydrant": 28,
    "stop sign": 29,
    "parking meter": 30,
    "bench": 31,
    "bird": 32,
    "cat": 33,
    "dog": 34,
    "horse": 35,
    "sheep": 36,
    "cow": 37,
    "elephant": 38,
    "bear": 39,
    "zebra": 40,
    "giraffe": 41,
    "backpack": 42,
    "umbrella": 43,
    "handbag": 44,
    "tie": 45,
    "suitcase": 46,
    "frisbee": 47,
    "skis": 48,
    "snowboard": 49,
    "sports ball": 50,
    "kite": 51,
    "baseball bat": 52,
    "baseball glove": 53,
    "skateboard": 54,
    "surfboard": 55,
    "tennis racket": 56,
    "bottle": 57,
    "wine glass": 58,
    "cup": 59,
    "fork": 60,
    "knife": 61,
    "spoon": 62,
    "bowl": 63,
    "banana": 64,
    "apple": 65,
    "sandwich": 66,
    "orange": 67,
    "broccoli": 68,
    "carrot": 69,
    "hot dog": 70,
    "pizza": 71,
    "donut": 72,
    "cake": 73,
    "chair": 74,
    "couch": 75,
    "potted plant": 76,
    "bed": 77,
    "dining table": 78,
    "toilet": 79,
    "tv": 80,
    "laptop": 81,
    "mouse": 82,
    "remote": 83,
    "keyboard": 84,
    "cell phone": 85,
    "microwave": 86,
    "oven": 87,
    "toaster": 88,
    "sink": 89,
    "refrigerator": 90,
    "book": 91,
    "clock": 92,
    "vase": 93,
    "scissors": 94,
    "teddy bear": 95,
    "hair drier": 96,
    "toothbrush": 97
}

## Function

In [None]:
def rename_label(input_file, output_file):
  with open(input_file, "r") as infile, open(output_file, "w") as outfile:
      for line in infile:
          words = line.strip().split()
          new_line = []
          for word in words:
            if word in id_class:
              class_name = id_class.get(word)
              id_new = class_id.get(class_name)
              new_line.append(str(id_new))
            else:
              new_line.append(word)
          outfile.write(" ".join(new_line) + "\n")

In [None]:
def remove_label(input_file, output_file, nc=21):
  with open(input_file, "r") as infile, open(output_file, "w") as outfile:
    # For each line, split the each part
    for line in infile:
      words = line.strip().split()
      new_line = []
      # For each word, tranform the id
      id = words[0]
      if int(id) < nc:
        outfile.write(line)

## Start

In [None]:
ROOT = ''

In [None]:
input_dir = ROOT + ""
output_dir = ROOT + ""

In [None]:
for root, _, files in os.walk(input_dir):
    for file in files:
      rename_label(input_dir+'/'+ file, output_dir+'/'+ file)

# Dataset comparison

In [None]:
labels_root = ''
images_root = ''

In [None]:
def compare_files(text_folder, image_folder):
    text_files = {os.path.splitext(f)[0] for f in os.listdir(text_folder) if f.endswith('.txt')}
    image_files = {os.path.splitext(f)[0] for f in os.listdir(image_folder) if f.endswith('.jpg')}
    missing_text_files = image_files - text_files
    missing_image_files = text_files - image_files

    if missing_text_files:
        print("Missing text files for the following images:")
        for file in missing_text_files:
            print(f"{file}.jpg")
    else:
        print("No missing text files for images.")

    if missing_image_files:
        print("Missing images for the following text files:")
        for file in missing_image_files:
            print(f"{file}.txt")
    else:
        print("No missing images for text files.")

In [None]:
compare_files(labels_root, images_root)

# Dataset filtering

In [None]:
missing_array = []

for file in os.listdir(labels_root):
    if not file.endswith('.txt'):
        continue

    label_path = os.path.join(labels_root, file)

    image_file = file.replace('.txt', '.jpg')
    image_path = os.path.join(images_root, image_file)

    if not os.path.exists(image_path):
        missing_array.append(image_file)
        print(f"Skipping missing image: {image_path}")
        continue

    with Image.open(image_path) as img:
        img_w, img_h = img.size

    output_lines = []
    with open(label_path, 'r') as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) != 5:
                continue
            class_id = int(parts[0])
            bbox = list(map(float, parts[1:]))
            xmin, ymin, xmax, ymax = yolo_to_f_rcnn(bbox, img_w, img_h)
            output_lines.append(f"{class_id} {xmin} {ymin} {xmax} {ymax}")

    os.makedirs(output_root, exist_ok=True)
    output_file = os.path.join(output_root, file)

    with open(output_file, 'w') as out_f:
        out_f.write('\n'.join(output_lines))

    print(f"Converted: {label_path} → {output_file}")

if missing_array:
    print("\nMissing images for the following label files:")
    for missing in missing_array:
        print(missing)
else:
    print("\nAll label files have matching images.")