In [1]:
import os
from tqdm import tqdm
from PIL import Image
import shutil
import random

DESTINATION_DIR = '../YOLO'

CLASSES_FULL = { 
    name: i for i, name in enumerate([
    "aircraft",
    "ant",
    "badminton",
    "bait",
    "balloon",
    "bat",
    "bee",
    "bery",
    "bicycle",
    "bird",
    "boat",
    "bowling",
    "bronze_statue",
    "car",
    "chick",
    "dice",
    "drosophila",
    "eagle",
    "electric_bicycle",
    "fish",
    "fly",
    "golf",
    "hoodle",
    "insect",
    "jet",
    "kite",
    "Kongming_Lantern",
    "ladybird",
    "light",
    "meteor",
    "moon",
    "Paraglider",
    "people",
    "pingpang",
    "poker_box",
    "sheep",
    "shot",
    "shrimp",
    "shuttlecock",
    "skateboard",
    "snake_head",
    "soccer",
    "star",
    "surfing",
    "tennis",
    "ufo",
    "volleyball",
    "yoyo"
])}
CLASSES_MIN = { 
    name: i for i, name in enumerate([
    "aircraft",
    "bird",
])}
CLASSES_AUG = { 
    "plane": 0,
    "aeroplane": 0,
    "bird": 1
    }
CLASSES = CLASSES_AUG
CLASSES_INV = {value: key for key, value in CLASSES.items()}


def convert_file(dataset_dir: str, file: str, obj_class: str, train_split: float):
    full_path = os.path.join(dataset_dir, file)
    fname, img_dir, destination = [f"{full_path}/{file}.txt", f"{full_path}/img", f"{full_path}/bbox"]
    if not os.path.exists(destination):
        os.makedirs(destination)
    
    images = [d for d in os.listdir(img_dir)]
    images.sort()
    with open(fname, 'r') as f:
        num_lines = sum([1 for l in f if l.strip() != ''])

    with open(fname, 'r') as f:
        if num_lines != len(images):
            print(f"length of {fname} does not match number of files in {img_dir}")
        else:
            lines = [l for l in f]
            for i, (line, image) in enumerate(zip(lines, images)):
                # print(image)
                if line.strip() == '': pass
                x, y, w, h = [float(val) for val in line.strip().replace(',', ' ').split()]
                x, y, w, h = normalise_bbox(x, y, w, h, os.path.join(img_dir, image))
                new_fname = f"{file}_{image.replace('.jpg', '')}"

                split = 'train' if random.random() < train_split else ('val' if random.random() < 0.5 else 'test')

                with open(os.path.join(dataset_dir, '..', DESTINATION_DIR, 'labels', split, (new_fname+'.txt')), 'w') as f2:
                    f2.write(' '.join(str(v) for v in [obj_class, x, y, w, h]))
                shutil.copyfile(os.path.join(img_dir, image), os.path.join(dataset_dir, '..', DESTINATION_DIR, 'images', split, (new_fname+'.jpg')))

def normalise_bbox(x, y, w, h, fname):

    center_x = x + (w / 2)
    center_y = y + (h / 2)

    with Image.open(fname) as img:
        width, height = img.size
    return (center_x / width, center_y / height, w / width, h / height)

def convert_dataset(dataset_dir: str = '.', train_split: float = 0.8):
    
    create_directory_if_not_exists(os.path.join(dataset_dir, DESTINATION_DIR, 'images', 'train')) 
    create_directory_if_not_exists(os.path.join(dataset_dir, DESTINATION_DIR, 'images', 'val'))
    # create_directory_if_not_exists(os.path.join(dataset_dir, DESTINATION_DIR, 'images', 'test'))
    create_directory_if_not_exists(os.path.join(dataset_dir, DESTINATION_DIR, 'labels', 'train'))
    create_directory_if_not_exists(os.path.join(dataset_dir, DESTINATION_DIR, 'labels', 'val'))
    # create_directory_if_not_exists(os.path.join(dataset_dir, DESTINATION_DIR, 'labels', 'test'))

    directories = [d for d in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, d))]
    for d in directories:
        print(f"DIRECTORY: {d}")
        img_dir = os.path.join(dataset_dir, d, 'img')
        lbl_dir = os.path.join(dataset_dir, d, 'annotation')
        for img_name in tqdm(os.listdir(img_dir)):
            
            split = 'train' if random.random() < train_split else 'val'
            img_dest = os.path.join(dataset_dir, DESTINATION_DIR, 'images', split)
            lbl_dest = os.path.join(dataset_dir, DESTINATION_DIR, 'labels', split)

            img_path = os.path.join(img_dir, img_name)
            lbl_name = img_name.replace('.png', '.txt')
            lbl_path = os.path.join(lbl_dir, lbl_name)

            new_lines = []
            try:
                with open(lbl_path, 'r') as labels:
                    for label in labels:
                        cls, x, y, w, h = label.replace(',', ' ').split()
                        if cls in CLASSES:
                            cls = CLASSES[cls]
                            x, y, w, h = normalise_bbox(float(x), float(y), float(w), float(h), img_path)
                        new_lines.append(' '.join(str(val) for val in [cls, x, y, w, h]))
                with open(os.path.join(lbl_dest, d + "_" + lbl_name), 'w') as f:
                    f.writelines(new_lines)
                shutil.copyfile(img_path, os.path.join(img_dest, d + "_" + img_name))
            except Exception as e:
                print(lbl_path)
                print(f"DEST {lbl_dest}")
                raise e

        # class_name = d[:find_first_numerical_index(d)]
        # if class_name not in CLASSES:
        #     print(f"no class found for {d} (class name: {class_name})")
        # else:
        #     convert_file(dataset_dir, d, CLASSES[class_name], train_split=train_split)


def create_directory_if_not_exists(directory_path):
    if not os.path.exists(directory_path):
        os.makedirs(directory_path)

def find_first_numerical_index(string):
    for index, char in enumerate(string):
        if char.isdigit():
            return index


In [2]:
convert_dataset('datasets/donghyun_dataset_split/raw')
# convert_dataset('datasets/TOT')


DIRECTORY: bird_9_20230619


  0%|          | 0/44 [00:00<?, ?it/s]

100%|██████████| 44/44 [00:00<00:00, 265.48it/s]


DIRECTORY: plane_11_20240125_w1


100%|██████████| 41/41 [00:00<00:00, 287.59it/s]


DIRECTORY: plane_5_20240129(ov1)


100%|██████████| 76/76 [00:00<00:00, 328.18it/s]


DIRECTORY: plane_7_20230620_w1


100%|██████████| 220/220 [00:00<00:00, 282.39it/s]


DIRECTORY: plane_21_20240131(ov1)


100%|██████████| 57/57 [00:00<00:00, 346.26it/s]


DIRECTORY: bird_14_20240131


100%|██████████| 18/18 [00:00<00:00, 343.86it/s]


DIRECTORY: plane_11_20240130(ov1)


100%|██████████| 84/84 [00:00<00:00, 341.75it/s]


DIRECTORY: bird_6_20230613


100%|██████████| 42/42 [00:00<00:00, 207.97it/s]


DIRECTORY: plane_4_20240129(ov1)


100%|██████████| 77/77 [00:00<00:00, 192.45it/s]


DIRECTORY: bird_13_20240131


100%|██████████| 14/14 [00:00<00:00, 342.05it/s]


DIRECTORY: plane_16_20240131(ov1)


100%|██████████| 70/70 [00:00<00:00, 113.12it/s]


DIRECTORY: bird_1_20230531


100%|██████████| 29/29 [00:00<00:00, 129.83it/s]


DIRECTORY: plane_15_20240129_w1


100%|██████████| 97/97 [00:00<00:00, 305.50it/s]


DIRECTORY: bird_11_20240130


100%|██████████| 27/27 [00:00<00:00, 323.57it/s]


DIRECTORY: plane_19_20240131(ov1)


100%|██████████| 40/40 [00:00<00:00, 327.36it/s]


DIRECTORY: bird_3_20230613


100%|██████████| 32/32 [00:00<00:00, 407.73it/s]


DIRECTORY: plane_19_20240129_w1


100%|██████████| 34/34 [00:00<00:00, 321.86it/s]


DIRECTORY: plane_2_20230531(ov1)


100%|██████████| 271/271 [00:01<00:00, 235.14it/s]


DIRECTORY: plane_6_20240129(ov1)


100%|██████████| 60/60 [00:00<00:00, 140.83it/s]


DIRECTORY: bird_2_20230613


100%|██████████| 33/33 [00:00<00:00, 368.83it/s]


DIRECTORY: bird_8_20230619


100%|██████████| 31/31 [00:00<00:00, 249.70it/s]


DIRECTORY: plane_3_20230601_w1


100%|██████████| 68/68 [00:00<00:00, 190.64it/s]


DIRECTORY: plane_1_20230525_w1


100%|██████████| 72/72 [00:00<00:00, 164.99it/s]


DIRECTORY: plane_2_20230530_w1


100%|██████████| 143/143 [00:00<00:00, 143.74it/s]


DIRECTORY: plane_13_20240130(ov1)


100%|██████████| 75/75 [00:00<00:00, 335.05it/s]


DIRECTORY: plane_9_20240125_w1


100%|██████████| 35/35 [00:02<00:00, 14.60it/s] 


DIRECTORY: plane_14_20240130(ov1)


100%|██████████| 66/66 [00:00<00:00, 330.10it/s]


DIRECTORY: plane_20_20240131(ov1)


100%|██████████| 65/65 [00:00<00:00, 345.71it/s]


DIRECTORY: plane_9_20240130(ov1)


100%|██████████| 70/70 [00:00<00:00, 337.77it/s]


DIRECTORY: plane_18_20240129_w1


100%|██████████| 66/66 [00:00<00:00, 331.35it/s]


DIRECTORY: plane_8_20230622_w1


100%|██████████| 106/106 [00:00<00:00, 287.09it/s]


DIRECTORY: plane_14_20240129_w1


100%|██████████| 93/93 [00:00<00:00, 149.14it/s]


DIRECTORY: plane_10_20240130(ov1)


100%|██████████| 59/59 [00:00<00:00, 140.00it/s]


DIRECTORY: plane_16_20240129_w1


100%|██████████| 40/40 [00:00<00:00, 82.61it/s] 


DIRECTORY: plane_18_20240131(ov1)


100%|██████████| 89/89 [00:02<00:00, 36.03it/s] 


DIRECTORY: plane_15_20240131(ov1)


100%|██████████| 98/98 [00:00<00:00, 306.21it/s]


DIRECTORY: plane_12_20240130(ov1)


100%|██████████| 70/70 [00:00<00:00, 330.32it/s]


DIRECTORY: bird_10_20240129


100%|██████████| 27/27 [00:00<00:00, 478.98it/s]


DIRECTORY: plane_17_20240131(ov1)


100%|██████████| 31/31 [00:00<00:00, 299.12it/s]


DIRECTORY: plane_10_20240125_w1


100%|██████████| 118/118 [00:00<00:00, 287.42it/s]


DIRECTORY: plane_7_20240129(ov1)


100%|██████████| 72/72 [00:00<00:00, 133.73it/s]


DIRECTORY: plane_8_20240130(ov1)


100%|██████████| 56/56 [00:00<00:00, 140.80it/s]


DIRECTORY: bird_12_20240130


100%|██████████| 3/3 [00:00<00:00, 20.43it/s]


DIRECTORY: plane_21_20240130_w1


100%|██████████| 20/20 [00:00<00:00, 246.53it/s]


DIRECTORY: plane_6_20230619_w1


100%|██████████| 215/215 [00:03<00:00, 67.03it/s]


DIRECTORY: bird_7_20230619


100%|██████████| 31/31 [00:00<00:00, 300.88it/s]


DIRECTORY: bird_4_20230613


100%|██████████| 46/46 [00:00<00:00, 415.55it/s]


DIRECTORY: plane_20_20240129_w1


100%|██████████| 38/38 [00:00<00:00, 322.47it/s]


DIRECTORY: bird_5_20230613


100%|██████████| 49/49 [00:00<00:00, 387.98it/s]


DIRECTORY: plane_4_20230614_w1


100%|██████████| 39/39 [00:00<00:00, 262.94it/s]


DIRECTORY: plane_5_20230614_w1


100%|██████████| 101/101 [00:00<00:00, 107.82it/s]


DIRECTORY: plane_13_20240129_w1


100%|██████████| 55/55 [00:00<00:00, 101.98it/s]


DIRECTORY: plane_12_20240125_w1


100%|██████████| 103/103 [00:02<00:00, 40.37it/s]


DIRECTORY: plane_3_20230622(ov1)


100%|██████████| 198/198 [00:00<00:00, 232.22it/s]


DIRECTORY: plane_1_20230525(ov1)


100%|██████████| 100/100 [00:00<00:00, 194.29it/s]


DIRECTORY: plane_17_20240129_w1


100%|██████████| 48/48 [00:00<00:00, 85.73it/s] 


In [None]:
import cv2

def display_bbox(file: str):
    image_path = f'datasets/TOT/YOLO/images/train/{file}.jpg'
    bbox_path = f'datasets/TOT/YOLO/labels/train/{file}.txt'
    # Read image
    image = cv2.imread(image_path)

    # Read bounding box information from text file
    with open(bbox_path, "r") as file:
        bounding_box_info = file.readline().strip().split()
        print(f"NORMED BBOX: {bounding_box_info}")
        class_label = int(bounding_box_info[0])
        center_x = float(bounding_box_info[1])
        center_y = float(bounding_box_info[2])
        width = float(bounding_box_info[3])
        height = float(bounding_box_info[4])

    # Convert normalized coordinates to absolute coordinates
    h, w, _ = image.shape
    x1 = int((center_x - width / 2) * w)
    y1 = int((center_y - height / 2) * h)
    x2 = int((center_x + width / 2) * w)
    y2 = int((center_y + height / 2) * h)
    print(f"RESTORED INFO: {center_x * w, center_y * h, width * w, height * h}")
    print(f"WIDTH={w}, HEIGHT={h}")
    # Draw bounding box on image
    box1 = ((int(center_x*w), int(center_y*h)), (int((center_x*w))+1, int((center_y*h))+1))
    box2 = ((x1, y1), (x2, y2))
    cv2.rectangle(image, box1[0], box1[1], (0, 255, 0), 2)
    cv2.rectangle(image, box2[0], box2[1], (0, 255, 0), 2)

    # Display image with bounding box
    print(f'class = {CLASSES_INV[class_label]}')
    cv2.imshow("Image with Bounding Box", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    return box1, box2

def display_bbox_reg(obj: str, num: int, box1 = None, box2 = None):
    image_path = f'datasets/TOT/raw/{obj}/img/{num:05d}.jpg'
    bbox_path = f'datasets/TOT/raw/{obj}/{obj}.txt'
    # Read image
    image = cv2.imread(image_path)

    # Read bounding box information from text file
    with open(bbox_path, "r") as file:
        line = [l for l in file][num-1]
        print(num - 1)
        bounding_box_info = line.strip().split()
        print(f"REGULAR BBOX {bounding_box_info}")
        x = float(bounding_box_info[0])     
        y = float(bounding_box_info[1])
        width = float(bounding_box_info[2])
        height = float(bounding_box_info[3])

    center_x = x + (width / 2)
    center_y = y + (height / 2)
    print(f"CENTER_X: {center_x}, CENTER_Y: {center_y}")
    h, w, _ = image.shape
    
    with Image.open(image_path) as img:
        w2, h2 = img.size
    x_norm, y_norm, w_norm, h_norm = center_x / w, center_y / h, width / w, height / h
    print(f"NORMED INFO: {x_norm, y_norm, w_norm, h_norm}")
    print(normalise_bbox(x, y, width, height, image_path))

    x11 = int((x_norm - w_norm / 2) * w)
    y11 = int((y_norm - h_norm / 2) * h)
    x21 = int((x_norm + w_norm / 2) * w)
    y21 = int((y_norm + h_norm / 2) * h)

    # Convert normalized coordinates to absolute coordinates
    ## x, y are center
    x1 = int((x))
    y1 = int((y))
    x2 = int((x + width))
    y2 = int((y + height))

    # Draw bounding box on image

    if box1 is not None:
        cv2.rectangle(image, box1[0], box1[1], (0, 255, 0), 2)
    if box2 is not None:
        cv2.rectangle(image, box2[0], box2[1], (0, 255, 0), 2)


    cv2.rectangle(image, (int(center_x), int(center_y)), (int(center_x)+1, int(center_y)+1), (255, 0, 0), 2)
    cv2.rectangle(image, (x11, y11), (x21, y21), (255, 0, 0), 2)
    # cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)

    # Display image with bounding box
    cv2.imshow("Image with Bounding Box", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [8]:
box1, box2 = display_bbox('jet8_00002')
display_bbox_reg('jet8', 1)


NORMED BBOX: ['24', '0.5066089613034623', '0.5691847826086956', '0.043441955193482686', '0.05394927536231884']
RESTORED INFO: (248.745, 157.095, 21.33, 14.89)
WIDTH=491, HEIGHT=276
class = jet
0
REGULAR BBOX ['236.36', '151.09', '21.33', '14.89']
CENTER_X: 247.025, CENTER_Y: 158.535
NORMED INFO: (0.5031059063136456, 0.5744021739130435, 0.043441955193482686, 0.05394927536231884)
(0.5031059063136456, 0.5744021739130435, 0.043441955193482686, 0.05394927536231884)


In [41]:
with open('datasets/ToT_extracted/bird22-1/bird22-1.txt', 'r') as f1:
    print(sum(1 for _ in f1))
with open('datasets/ToT_extracted/bird22-1/bird22-1.txt', 'r') as f1:
    print(sum([1 for _ in f1]))

119
119


convert_baseline.ipynb	README.md  VideoMix.py		yolov8n.pt
datasets		runs	   VideoTransform.py
i3d.py			train.py   YOLO_training.ipynb


In [11]:
print(f'hiya {5:5d}')

hiya     5
