In [None]:
# categorize images into colour and infrared folders
import os
from os.path import join as pjoin, basename
import sys
from shutil import move
from glob import glob

import supervision as sv
import cv2

from colour_vs_infrared import is_infrared
from constants import PROJECT_ROOT, DATA_DIR

wfs_data_root = os.path.join(DATA_DIR, "obj_Train_data")

from tqdm import tqdm

os.makedirs(os.path.join(wfs_data_root, "colour"), exist_ok=True)
os.makedirs(os.path.join(wfs_data_root, "infrared"), exist_ok=True)

for image in tqdm(glob(os.path.join(wfs_data_root, "*.jpg")), desc="Processing images", unit="image"):
    image_path = image
    label_path = image_path.replace(".jpg", ".txt")
    if not os.path.exists(image_path):
        print(f"Image {image_path} does not exist.")
        continue
    if is_infrared(image_path):
        move(image_path, os.path.join(wfs_data_root, "infrared", basename(image_path)))
        move(label_path, os.path.join(wfs_data_root, "infrared", basename(label_path)))
    else:
        move(image_path, os.path.join(wfs_data_root, "colour", basename(image_path)))
        move(label_path, os.path.join(wfs_data_root, "colour", basename(label_path)))

Processing images: 100%|██████████| 4896/4896 [01:59<00:00, 40.80image/s]


In [5]:
# categorize images by their labels
# if labels have any item id higher than 3, move to other folder
from os import walk
from os.path import isfile, join as pjoin
from shutil import move
from tqdm import tqdm

from constants import PROJECT_ROOT, DATA_DIR

# get label paths
label_paths = []
for root, dirs, files in os.walk(os.path.join(DATA_DIR, "obj_Train_data")):
    for file in files:
        if file.endswith(".txt"):
            label_paths.append(os.path.join(root, file))

print(f"Found {len(label_paths)} label files.")
print("Categorizing labels...")

# create folders
os.makedirs(os.path.join(DATA_DIR, "obj_Test_data", "colour_other"), exist_ok=True)
os.makedirs(os.path.join(DATA_DIR, "obj_Test_data", "infrared_other"), exist_ok=True)
os.makedirs(os.path.join(DATA_DIR, "obj_Train_data", "colour_other"), exist_ok=True)
os.makedirs(os.path.join(DATA_DIR, "obj_Train_data", "infrared_other"), exist_ok=True)

# categorize labels
for label_path in tqdm(label_paths, desc="Processing labels", unit="label"):
    with open(label_path, "r") as f:
        lines = f.readlines()
    image_path = label_path.replace(".txt", ".jpg")
    # check if any item id is higher than 3
    if any(int(line.split()[0]) > 3 for line in lines):
        # move to other folder
        if "obj_Test_data" in label_path:
            move(label_path, os.path.join(DATA_DIR, "obj_Test_data", "colour_other", basename(label_path)))
            move(image_path, os.path.join(DATA_DIR, "obj_Test_data", "colour_other", basename(image_path)))
        else:
            move(label_path, os.path.join(DATA_DIR, "obj_Train_data", "colour_other", basename(label_path)))
            move(image_path, os.path.join(DATA_DIR, "obj_Train_data", "colour_other", basename(image_path)))
    else:
        # move to normal folder
        if "obj_Test_data" in label_path:
            move(label_path, os.path.join(DATA_DIR, "obj_Test_data", "colour", basename(label_path)))
            move(image_path, os.path.join(DATA_DIR, "obj_Test_data", "colour", basename(image_path)))
        else:
            move(label_path, os.path.join(DATA_DIR, "obj_Train_data", "colour", basename(label_path)))
            move(image_path, os.path.join(DATA_DIR, "obj_Train_data", "colour", basename(image_path)))

Found 8467 label files.
Categorizing labels...


Processing labels: 100%|██████████| 8467/8467 [00:02<00:00, 3447.90label/s]
