In [14]:
import tensorflow as tf
import random
from matplotlib.pyplot import imshow
import math
from tqdm import tqdm

In [12]:
object_details = {}

# with open("./data/pascal_voc_training_data.txt", "r") as fr:
with open("./data/pascal_voc_training_data.txt", "r") as fr:
    for line in fr:
        line = line.strip().split()
        filename = line[0]

        object_details[filename] = []

        for idx in range(1, len(line), 5):
            object_details[filename].append(tuple([int(v) for v in line[idx: idx+5]]))

        if object_details[filename].__len__() == 0:
            print(filename)

object_details

{'000005.jpg': [(263, 211, 324, 339, 8),
  (165, 264, 253, 372, 8),
  (5, 244, 67, 374, 8),
  (241, 194, 295, 299, 8),
  (277, 186, 312, 220, 8)],
 '000007.jpg': [(141, 50, 500, 330, 6)],
 '000009.jpg': [(69, 172, 270, 330, 12),
  (150, 141, 229, 284, 14),
  (285, 201, 327, 331, 14),
  (258, 198, 297, 329, 14)],
 '000012.jpg': [(156, 97, 351, 270, 6)],
 '000016.jpg': [(92, 72, 305, 473, 1)],
 '000017.jpg': [(185, 62, 279, 199, 14), (90, 78, 403, 336, 12)],
 '000019.jpg': [(231, 88, 483, 256, 7), (11, 113, 266, 259, 7)],
 '000020.jpg': [(33, 148, 371, 416, 6)],
 '000021.jpg': [(1, 235, 182, 388, 11),
  (210, 36, 336, 482, 14),
  (46, 82, 170, 365, 14),
  (11, 181, 142, 419, 14)],
 '000023.jpg': [(9, 230, 245, 500, 1),
  (230, 220, 334, 500, 1),
  (2, 178, 90, 500, 1),
  (2, 1, 117, 369, 14),
  (3, 2, 243, 462, 14),
  (225, 1, 334, 486, 14)],
 '000024.jpg': [(196, 165, 489, 247, 18)],
 '000026.jpg': [(90, 125, 337, 212, 6)],
 '000030.jpg': [(36, 205, 180, 289, 1),
  (51, 160, 150, 292, 1

### Data augmentation using existing code

In [13]:
from data_aug.data_aug import RandomHorizontalFlip, RandomRotate, RandomScale, RandomShear, RandomTranslate, Sequence, RandomHSV
from data_aug.bbox_util import draw_rect
import numpy as np 
import cv2 
import matplotlib.pyplot as plt 
import pickle as pkl
%matplotlib inline

In [15]:
with open("./data/data_augmentation_pascal_voc_trainig_data.txt", "w") as fw:
    for filename in tqdm(object_details.keys()):
        img = cv2.imread('./data/VOCdevkit_train/VOC2007/JPEGImages/{}'.format(filename))[:,:,::-1]
        bboxes = np.array(object_details[filename], dtype=np.float32)
        h, w, _ = img.shape

        count = 0

        for idx in range(20):
            seq = Sequence([RandomHSV(40, 40, 30), RandomRotate(10), RandomHorizontalFlip(), RandomScale(), RandomShear()])

            if idx == 0:
                img_, bboxes_ = img.copy(), bboxes.copy()
            else:
                img_, bboxes_ = seq(img.copy(), bboxes.copy())
            bboxes_ = bboxes_.astype(int)

            if bboxes_.shape[0] <= 0:
                continue

            if np.all(bboxes_[:, 0] >= 0) and np.all(bboxes_[:, 1] >= 0) \
                and np.all(bboxes_[:, 2] <= w) and np.all(bboxes_[:, 3] <= h) \
                and np.all(bboxes_[:, 0] < bboxes_[:, 2]) and np.all(bboxes_[:, 1] < bboxes_[:, 3]):

                cv2.imwrite('./data/VOC_Augmentation_train/{}_{}'.format(idx, filename), img_[:, :, ::-1])

                fw.write("{}_{}".format(idx, filename))

                for xmin, ymin, xmax, ymax, class_num in bboxes_.tolist():
                    fw.write(" {} {} {} {} {}".format(xmin, ymin, xmax, ymax, class_num))
                
                fw.write("\n")
                count += 1

                if count >= 2:
                    break
            else:
                continue

100%|██████████| 4974/4974 [01:35<00:00, 52.14it/s]


### Another version of data augmentation

In [16]:
import albumentations as A
from matplotlib.pyplot import imshow
import cv2
import numpy as np
import random
import os
from tqdm import tqdm

In [17]:
def resize_with_bboxes(target_size, image, bboxes):
    resize = A.Compose([A.Resize(target_size[0], target_size[1], cv2.INTER_LINEAR)], bbox_params=A.BboxParams(format='pascal_voc', min_visibility=0.2))

    resized_img = resize(image=image.copy(), bboxes=bboxes.copy())

    return resized_img["image"].astype(np.uint8), resized_img["bboxes"]

In [18]:
def mosaic_augmentation(images, bboxes, size=(448, 448)):
    assert len(images) == 4

    h, w = size

    images = [image.copy() for image in images]
    bboxes = [bb.copy() for bb in bboxes]

    # Select the middle points
    # Horizontal (width orientation)
    x = random.randint(int(h * 0.33), int(h * 0.67))
    # Vertical (height orientation)
    y = random.randint(int(w * 0.33), int(w * 0.67))

    # Resize images
    images[0], bboxes[0] = resize_with_bboxes((y, x), images[0], bboxes[0])
    images[1], bboxes[1] = resize_with_bboxes((y, w - x), images[1], bboxes[1])
    images[2], bboxes[2] = resize_with_bboxes((h - y, x), images[2], bboxes[2])
    images[3], bboxes[3] = resize_with_bboxes((h - y, w - x), images[3], bboxes[3])

    # Fill the result image
    result_image = np.zeros((*size, images[0].shape[-1]))

    result_image[:y, :x] = images[0]
    result_image[:y, x:] = images[1]
    result_image[y:, :x] = images[2]
    result_image[y:, x:] = images[3]

    # Compute the final bboxes
    result_bboxes = []

    for bbox in bboxes[0]:
        result_bboxes.append(bbox + np.array([0, 0, 0, 0, 0]))

    for bbox in bboxes[1]:
        result_bboxes.append(bbox + np.array([x, 0, x, 0, 0]))
    
    for bbox in bboxes[2]:
        result_bboxes.append(bbox + np.array([0, y, 0, y, 0]))
    
    for bbox in bboxes[3]:
        result_bboxes.append(bbox + np.array([x, y, x, y, 0]))

    return result_image.astype(np.uint8), np.array(result_bboxes)

In [19]:
def mix_up_augmentation(images, bboxes, target_size=(448, 448), alpha=1.5):
    assert len(images) == 2

    lamb = random.betavariate(alpha, alpha)

    image1, image2 = [im.copy() for im in images]
    bboxes1, bboxes2 = [bb.copy() for bb in bboxes]

    image1, bboxes1 = resize_with_bboxes(target_size, image1, bboxes1)
    image2, bboxes2 = resize_with_bboxes(target_size, image2, bboxes2)

    result_image = image1 * lamb + image2 * (1 - lamb)
    result_bboxes = np.concatenate([bboxes1, bboxes2], axis=0)

    return result_image.astype(np.uint8), result_bboxes

In [20]:
def random_transform(image, bboxes):
    transform = A.Compose([
        A.BBoxSafeRandomCrop(p=0.5),
        A.HorizontalFlip(p=0.5),
        A.RandomScale(p=0.5),
        A.ShiftScaleRotate(rotate_limit=(-15, 15), p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.RGBShift(r_shift_limit=30, g_shift_limit=30, b_shift_limit=30, p=0.5),
        A.HueSaturationValue(p=0.5),
        A.Affine(p=0.5),
        A.Equalize(p=0.5),
    ], bbox_params=A.BboxParams(format='pascal_voc', min_visibility=0.2))

    image = image.copy()
    bboxes = bboxes.copy()

    transformed = transform(image=image, bboxes=bboxes)

    return transformed["image"].astype(np.uint8), transformed["bboxes"]

In [22]:
with open("./data/data_augmentation_pascal_voc_trainig_data.txt", "w") as fw:
    def _add_new_image(image, bboxes, result_filename):
        if bboxes.shape[0] <= 0:
            return

        cv2.imwrite('./data/VOC_Augmentation_train/{}'.format(result_filename), image[:, :, ::-1])

        fw.write("{}".format(result_filename))

        for xmin, ymin, xmax, ymax, class_num in bboxes.astype(int).tolist():
            fw.write(" {} {} {} {} {}".format(xmin, ymin, xmax, ymax, class_num))
        
        fw.write("\n")

    filenames = list(object_details.keys())
    num_filenames = filenames.__len__()

    # Random mix-up
    for _ in tqdm(range(int(num_filenames * 0.3)), total=int(num_filenames * 0.3), desc="Mix up"):
        idx1 = random.randint(0, num_filenames - 1)
        idx2 = random.randint(0, num_filenames - 1)

        image1 = cv2.imread('./data/VOCdevkit_train/VOC2007/JPEGImages/{}'.format(filenames[idx1]))[:,:,::-1]
        image2 = cv2.imread('./data/VOCdevkit_train/VOC2007/JPEGImages/{}'.format(filenames[idx2]))[:,:,::-1]
        bboxes1 = np.array(object_details[filenames[idx1]], dtype=np.float32)
        bboxes2 = np.array(object_details[filenames[idx2]], dtype=np.float32)

        mix_up_image, mix_up_bboxes = mix_up_augmentation([image1, image2], [bboxes1, bboxes2])

        clear_filename1 = filenames[idx1].split(".")[0]
        clear_filename2 = filenames[idx2].split(".")[0]

        _add_new_image(mix_up_image, mix_up_bboxes, "mix_up_{}_{}.jpg".format(clear_filename1, clear_filename2))
    
    # Random mosaic
    for _ in tqdm(range(num_filenames), total=num_filenames, desc="Mosaic"):
        indices = [random.randint(0, num_filenames - 1) for _ in range(4)]

        images = [cv2.imread('./data/VOCdevkit_train/VOC2007/JPEGImages/{}'.format(filenames[idx]))[:,:,::-1] for idx in indices]
        bboxes = [np.array(object_details[filenames[idx]], dtype=np.float32) for idx in indices]

        mosaic_image, mosaic_bboxes = mosaic_augmentation(images, bboxes)

        _add_new_image(mosaic_image, mosaic_bboxes, "mosaic_{}.jpg".format('_'.join([filenames[idx].split(".")[0] for idx in indices])))

    # Random transformation
    for filename in tqdm(filenames):
        image = cv2.imread('./data/VOCdevkit_train/VOC2007/JPEGImages/{}'.format(filename))[:,:,::-1]
        bboxes = np.array(object_details[filename], dtype=np.float32)

        transformed_image, transformed_bboxes = random_transform(image, bboxes)

        _add_new_image(transformed_image, np.array(transformed_bboxes), "rt_{}".format(filename))
    
    # Identity
    for filename in tqdm(filenames):
        image = cv2.imread('./data/VOCdevkit_train/VOC2007/JPEGImages/{}'.format(filename))[:,:,::-1]
        bboxes = np.array(object_details[filename], dtype=np.float32)

        _add_new_image(image, bboxes, "same_{}".format(filename))

Mix up: 100%|██████████| 1492/1492 [00:15<00:00, 98.07it/s] 
Mosaic: 100%|██████████| 4974/4974 [01:01<00:00, 80.85it/s]
100%|██████████| 4974/4974 [00:25<00:00, 195.86it/s]
100%|██████████| 4974/4974 [00:09<00:00, 510.08it/s]


### Deal with the imbalance labels problem

In [23]:
classes_name =  ["aeroplane", "bicycle", "bird", "boat", "bottle", 
                 "bus", "car", "cat", "chair", "cow", "diningtable", 
                 "dog", "horse", "motorbike", "person", "pottedplant", 
                 "sheep", "sofa", "train","tvmonitor"]

In [24]:
DATA_PATH = './data/pascal_voc_training_data.txt'

filename_to_class_ids = []

training_data_file = open(DATA_PATH, "r")
for i, line in enumerate(training_data_file):
    line = line.strip()
    line = line.split(" ")
    filename = line[0]

    filename_to_class_ids.append((filename, []))

    for idx in range(5, min(len(line), 1000000), 5):
        class_id = int(float(line[idx]))
        filename_to_class_ids[-1][1].append(class_id)

training_data_file.close()

filename_to_class_ids.sort(key=lambda item: (len(item[1]), item[0]), reverse=True)

In [25]:
balance_class_counts = [0 for _ in range(len(classes_name))]

def all_larger_than(target: int=5000):
    return sum(int(count > target) for count in balance_class_counts) == len(balance_class_counts)

used_filenames = []
repeat_count = {}
current_idx = 0
number_of_files = len(filename_to_class_ids)
target = 2000

while not all_larger_than(target):
    current_filename, class_ids = filename_to_class_ids[current_idx]

    any_class_larger_than_target = sum([int(balance_class_counts[cls_id] > target) for cls_id in class_ids])

    if any_class_larger_than_target == 0:
        used_filenames.append(current_filename)

        for cls_id in class_ids:
            balance_class_counts[cls_id] += 1
        
        if repeat_count.get(current_filename, None) is None:
            repeat_count[current_filename] = 0
        
        repeat_count[current_filename] += 1
    
    current_idx += 1
    current_idx %= number_of_files

In [26]:
for name, count in zip(classes_name, balance_class_counts):
    print("{}: {}".format(name, count))

print("Total number of images: {}".format(len(used_filenames)))

aeroplane: 2001
bicycle: 2001
bird: 2001
boat: 2002
bottle: 2007
bus: 2001
car: 2001
cat: 2001
chair: 2002
cow: 2001
diningtable: 2001
dog: 2001
horse: 2001
motorbike: 2001
person: 2001
pottedplant: 2001
sheep: 2007
sofa: 2001
train: 2003
tvmonitor: 2001
Total number of images: 21314


In [27]:
for filename in used_filenames:
    if filename == "000005.jpg":
        print("+1")

+1
+1


In [28]:
repeat_count["000005.jpg"]

2

In [30]:
repeat_count = {}

with open("./data/data_augmentation_pascal_voc_training_data.txt", "w") as fw:
    def _add_new_image(image, bboxes, result_filename):
        if bboxes.shape[0] <= 0:
            return

        cv2.imwrite('./data/VOC_Augmentation_train/{}'.format(result_filename), image[:, :, ::-1])

        fw.write("{}".format(result_filename))

        for xmin, ymin, xmax, ymax, class_num in bboxes.astype(int).tolist():
            fw.write(" {} {} {} {} {}".format(xmin, ymin, xmax, ymax, class_num))
        
        fw.write("\n")
    
    for filename in tqdm(sorted(used_filenames)):
        if repeat_count.get(filename, None) is not None:
            # Do the random transformation
            image = cv2.imread('./data/VOCdevkit_train/VOC2007/JPEGImages/{}'.format(filename))[:,:,::-1]
            bboxes = np.array(object_details[filename], dtype=np.float32)

            transformed_image, transformed_bboxes = random_transform(image, bboxes)

            _add_new_image(transformed_image, np.array(transformed_bboxes), "rt_{}_{}".format(repeat_count[filename], filename))

            repeat_count[filename] = repeat_count[filename] + 1
        else:
            # Do nothing
            image = cv2.imread('./data/VOCdevkit_train/VOC2007/JPEGImages/{}'.format(filename))[:,:,::-1]
            bboxes = np.array(object_details[filename], dtype=np.float32)

            _add_new_image(image, bboxes, "same_{}".format(filename))

            repeat_count[filename] = 1

    filenames = used_filenames
    num_filenames = filenames.__len__()

    # Random mix-up
    for idx in tqdm(range(int(num_filenames * 0.3)), total=int(num_filenames * 0.3), desc="Mix up"):
        idx1 = random.randint(0, num_filenames - 1)
        idx2 = random.randint(0, num_filenames - 1)

        image1 = cv2.imread('./data/VOCdevkit_train/VOC2007/JPEGImages/{}'.format(filenames[idx1]))[:,:,::-1]
        image2 = cv2.imread('./data/VOCdevkit_train/VOC2007/JPEGImages/{}'.format(filenames[idx2]))[:,:,::-1]
        bboxes1 = np.array(object_details[filenames[idx1]], dtype=np.float32)
        bboxes2 = np.array(object_details[filenames[idx2]], dtype=np.float32)

        mix_up_image, mix_up_bboxes = mix_up_augmentation([image1, image2], [bboxes1, bboxes2])

        clear_filename1 = filenames[idx1].split(".")[0]
        clear_filename2 = filenames[idx2].split(".")[0]

        _add_new_image(mix_up_image, mix_up_bboxes, "mix_up_{}_{}_{}.jpg".format(idx, clear_filename1, clear_filename2))
    
    # Random mosaic
    for idx in tqdm(range(num_filenames), total=num_filenames, desc="Mosaic"):
        indices = [random.randint(0, num_filenames - 1) for _ in range(4)]

        images = [cv2.imread('./data/VOCdevkit_train/VOC2007/JPEGImages/{}'.format(filenames[idx]))[:,:,::-1] for idx in indices]
        bboxes = [np.array(object_details[filenames[idx]], dtype=np.float32) for idx in indices]

        mosaic_image, mosaic_bboxes = mosaic_augmentation(images, bboxes)

        _add_new_image(mosaic_image, mosaic_bboxes, "mosaic_{}_{}.jpg".format(idx, '_'.join([filenames[idx].split(".")[0] for idx in indices])))

100%|██████████| 21314/21314 [01:36<00:00, 221.50it/s]
Mix up: 100%|██████████| 6394/6394 [01:11<00:00, 89.22it/s] 
Mosaic: 100%|██████████| 21314/21314 [06:11<00:00, 57.36it/s]


### Try another class balance method

In [31]:
import numpy as np
from tqdm import tqdm
import cv2
import albumentations as A
import random

In [32]:
classes_name =  ["aeroplane", "bicycle", "bird", "boat", "bottle", 
                 "bus", "car", "cat", "chair", "cow", "diningtable", 
                 "dog", "horse", "motorbike", "person", "pottedplant", 
                 "sheep", "sofa", "train","tvmonitor"]

In [33]:
DATA_PATH = './data/pascal_voc_training_data.txt'

filename_to_class_ids = {}
class_to_filenames = [[] for _ in range(len(classes_name))]

training_data_file = open(DATA_PATH, "r")
for i, line in enumerate(training_data_file):
    line = line.strip()
    line = line.split(" ")
    filename = line[0]

    filename_to_class_ids[filename] = []

    class_ids = []
    for idx in range(5, min(len(line), 1000000), 5):
        class_id = int(float(line[idx]))
        class_ids.append(class_id)
    
    class_count_in_this_filename = [0 for _ in range(len(classes_name))]
    
    for class_id in class_ids:
        filename_to_class_ids[filename].append(class_id)
        class_count_in_this_filename[class_id] += 1
    
    class_count_in_this_filename = np.array(class_count_in_this_filename, dtype=np.float32)
    class_count_in_this_filename[class_count_in_this_filename == 0] = float("inf")
    
    # Put the filename to the minority class in current file
    minority_class = np.argmin(class_count_in_this_filename)
    class_to_filenames[minority_class].append(filename)

training_data_file.close()

In [34]:
for name, filenames in zip(classes_name, class_to_filenames):
    print("{}: {}".format(name, len(filenames)))

aeroplane: 239
bicycle: 235
bird: 318
boat: 167
bottle: 190
bus: 172
car: 561
cat: 331
chair: 277
cow: 121
diningtable: 187
dog: 359
horse: 262
motorbike: 199
person: 665
pottedplant: 131
sheep: 79
sofa: 138
train: 222
tvmonitor: 121


In [38]:
# Perform something like Class-aware sampling
sampled_class_counts = np.array([0 for _ in range(len(classes_name))])
class_sample_offset = [0 for _ in range(len(classes_name))]
sampled_filenames = []

target = 3000

while True:
    # Find the class with the smallest number of samples
    minority_class_idx = np.argmin(sampled_class_counts)
    majority_class_idx = np.argmax(sampled_class_counts)

    if minority_class_idx in [14]:
        continue

    # Stop condition
    if sampled_class_counts[minority_class_idx] >= target:
        break

    # if sampled_class_counts[majority_class_idx] >= upper_bound_target:
    #     break

    # Sample the filename from the minority class
    while True:
        sampled_filename = class_to_filenames[minority_class_idx][class_sample_offset[minority_class_idx]]
        class_sample_offset[minority_class_idx] = (class_sample_offset[minority_class_idx] + 1) % len(class_to_filenames[minority_class_idx])

        all_are_not_exceed = all([sampled_class_counts[class_id] < target for class_id in filename_to_class_ids[sampled_filename]])

        if all_are_not_exceed:
            break

    # Update the sampled class counts
    for class_id in filename_to_class_ids[sampled_filename]:
        sampled_class_counts[class_id] += 1
    
    # Update the sampled filenames
    sampled_filenames.append(sampled_filename)

# Print the sampled class counts
print("Sampled class counts")
for idx, (name, count) in enumerate(zip(classes_name, sampled_class_counts)):
    print("({:02d}) {:15s}: {:5d}".format(idx, name, count), end='\n' if idx % 5 == 4 else " ")
print()

# Check number of repeat filenames
repeat_file_counts = {}
for filename in sampled_filenames:
    if repeat_file_counts.get(filename, None) is not None:
        repeat_file_counts[filename] += 1
    else:
        repeat_file_counts[filename] = 1

k = 3
top_k_repeat = sorted(repeat_file_counts.items(), key=lambda item: item[1], reverse=True)[:k]

print("Repeat file counts")
for filename, count in top_k_repeat:
    print("{}: {}".format(filename, count))

print()

Sampled class counts
(00) aeroplane      :  3001 (01) bicycle        :  3000 (02) bird           :  3030 (03) boat           :  3001 (04) bottle         :  3002
(05) bus            :  3000 (06) car            :  3001 (07) cat            :  3000 (08) chair          :  3002 (09) cow            :  3009
(10) diningtable    :  3001 (11) dog            :  3000 (12) horse          :  3001 (13) motorbike      :  3000 (14) person         :  3000
(15) pottedplant    :  3000 (16) sheep          :  3014 (17) sofa           :  3000 (18) train          :  3000 (19) tvmonitor      :  3000

Repeat file counts
001834.jpg: 288
005521.jpg: 288
005803.jpg: 287



In [39]:
# Count number of mix-up and mosaic
num_mix_up = sum([1 for item in sampled_filenames if isinstance(item, tuple) and item[0] == 'mix-up'])
num_mosaic = sum([1 for item in sampled_filenames if isinstance(item, tuple) and item[0] == 'mosaic'])

print("Number of mix-up: {}".format(num_mix_up))
print("Number of mosaic: {}".format(num_mosaic))
print("Number of normal: {}".format(len(sampled_filenames) - num_mix_up - num_mosaic))
print("Total number of samples: {}".format(len(sampled_filenames)))

Number of mix-up: 0
Number of mosaic: 0
Number of normal: 30804
Total number of samples: 30804


In [61]:
repeat_count = {}

with open("./data/data_augmentation_pascal_voc_training_data_3000.txt", "w") as fw:
    def _add_new_image(image, bboxes, result_filename):
        # if result_filename == 'rt_or_same_7_003754.jpg' or result_filename == 'rt_or_same_4_003656.jpg' or result_filename == 'rt_or_same_20_003754.jpg' or result_filename == 'rt_or_same_10_007555.jpg':
        #     return

        # if not isinstance(bboxes, np.ndarray):
        #     bboxes = np.array(bboxes)

        # print(bboxes)

        if bboxes.shape[0] <= 0:
        # if len(bboxes == 0):
            print("Ignore file: {}".format(result_filename))
            return
    
        image, bboxes = random_transform(image, bboxes)
        image, bboxes = resize_with_bboxes((448, 448), image, bboxes)

        if not isinstance(bboxes, np.ndarray):
            bboxes = np.array(bboxes)

        if bboxes.shape[0] <= 0:
        # if len(bboxes == 0):
            print("Ignore file: {}".format(result_filename))
            return

        cv2.imwrite('./data/VOC_Augmentation_train_3000/{}'.format(result_filename), image[:, :, ::-1])

        fw.write("{}".format(result_filename))

        for xmin, ymin, xmax, ymax, class_num in bboxes.astype(int).tolist():
            fw.write(" {} {} {} {} {}".format(xmin, ymin, xmax, ymax, class_num))
        
        fw.write("\n")
    
    for idx, filename in tqdm(enumerate(sampled_filenames), total=len(sampled_filenames)):
        if isinstance(filename, str):
            # Check if this image appears before
            if repeat_count.get(filename, None) is None:
                repeat_count[filename] = 0

            # Do nothing
            image = cv2.imread('./data/VOCdevkit_train/VOC2007/JPEGImages/{}'.format(filename))[:,:,::-1]
            bboxes = np.array(object_details[filename], dtype=np.float32)

            _add_new_image(image, bboxes, "rt_or_same_{}_{}".format(repeat_count[filename], filename))

            repeat_count[filename] = repeat_count[filename] + 1

        elif filename[0] == 'mosaic':
            mosaic_filanames = [filename[1][0], filename[1][1], filename[1][2], filename[1][3]]

            images = [cv2.imread('./data/VOCdevkit_train/VOC2007/JPEGImages/{}'.format(f))[:,:,::-1] for f in mosaic_filanames]
            bboxes = [np.array(object_details[f], dtype=np.float32) for f in mosaic_filanames]

            mosaic_image, mosaic_bboxes = mosaic_augmentation(images, bboxes)

            _add_new_image(mosaic_image, mosaic_bboxes, "mosaic_{}_{}.jpg".format('_'.join([f.split(".")[0] for f in mosaic_filanames]), idx))
        elif filename[0] == 'mix-up':
            filename1 = filename[1][0]
            filename2 = filename[1][1]

            image1 = cv2.imread('./data/VOCdevkit_train/VOC2007/JPEGImages/{}'.format(filename1))[:,:,::-1]
            image2 = cv2.imread('./data/VOCdevkit_train/VOC2007/JPEGImages/{}'.format(filename2))[:,:,::-1]
            bboxes1 = np.array(object_details[filename1], dtype=np.float32)
            bboxes2 = np.array(object_details[filename2], dtype=np.float32)

            mix_up_image, mix_up_bboxes = mix_up_augmentation([image1, image2], [bboxes1, bboxes2])

            clear_filename1 = filename1.split(".")[0]
            clear_filename2 = filename2.split(".")[0]

            _add_new_image(mix_up_image, mix_up_bboxes, "mix_up_{}_{}_{}.jpg".format(clear_filename1, clear_filename2, idx))
        else:
            raise Exception("Invalid filename: {}".format(filename))

  2%|▏         | 591/30804 [00:03<02:43, 185.32it/s]

Ignore file: rt_or_same_0_001332.jpg


 35%|███▍      | 10752/30804 [00:59<01:46, 188.81it/s]

Ignore file: rt_or_same_6_007773.jpg


 47%|████▋     | 14416/30804 [01:21<01:38, 166.98it/s]

Ignore file: rt_or_same_9_003083.jpg


 69%|██████▊   | 21172/30804 [02:00<00:52, 184.52it/s]

Ignore file: rt_or_same_23_003754.jpg


100%|██████████| 30804/30804 [02:53<00:00, 177.93it/s]
