In [1]:
# this file is responsible for generating an object detection dataset from "core material" given to it in specified folder

In [2]:
import os
import random
from PIL import Image
from tqdm import tqdm
import numpy as np
from skimage.transform import swirl
from skimage import img_as_ubyte
import threading

In [3]:
# this is an ULTRA-IMPORTANT list that holds which folder will be converted to what class (by idx)
# the class is later used to find the highest priority target (eg idx 5 is higher prioritty than idx 4)
folder_class_priority = ["filth",
                         "stray",
                         "schism",
                         "drone",
                         "soldier",
                         "streetcleaner",
                         "malicious_face",
                         "cerberus",
                         "swordsmachine",
                         "mannequin",
                         "gutterman",
                         "virtue",
                         "stalker",
                         "sentry",
                         "idol",
                         "guttertank",
                         "mindflayer",
                         "insurrectionist",
                         "ferryman",
                         "hideous_mass",
                         "red_sphere",
                         "blue_sphere"]

In [4]:
# material to create dataset with
backgrounds_path = fr".\dataset_materials\backgrounds"
objects_path = fr".\dataset_materials\objects"
obstructions_path = fr".\dataset_materials\obstructions"

# where to output the dataset (will automatically create folder structure for you)
train_images_folder = fr".\auto_generated_dataset\train_data\images"
train_labels_folder = fr".\auto_generated_dataset\train_data\labels"

val_images_folder = fr".\auto_generated_dataset\val_data\images"
val_labels_folder = fr".\auto_generated_dataset\val_data\labels"

# size and train / test split
num_images_to_generate = 12000
train_split = 0.8
val_split = 1 - train_split
num_workers = 6 # adjust for you cpu
num_workers -= 2

In [5]:
# the following code is one of the worst creations humanity has yet seen in this programming language.
# it was written as a one time use program that i kept upgrading without properly restructuring.
# if anybody is brave enough to go into thjese functions and fix everything you are welcome.

In [6]:
def write_label(path, classes, bnd_boxes):
    """
    generates a text file with the label information in the YOLO format.
    exactly the way labelimg does it
    """
    
    with open(path, "w") as file:
        for current_class, current_bnd_box in zip(classes, bnd_boxes):
            file.write(f"{current_class} ")
            
            for current_coordinate in current_bnd_box:
                file.write(f"{current_coordinate:.6f} ")
            
            file.write("\n")

In [7]:
def warp_img(img):
    """
    this applies a swirl effect on the image, used on the enemy images to give more variaty
    """
    warped_img = swirl(np.array(img), rotation=random.randint(-3, 3) / 16, strength=random.randint(-30, 30) / 50, radius=img.size[1])
    warped_img = Image.fromarray(img_as_ubyte(warped_img))
    
    return warped_img

In [8]:
def overlay_random_color(original_img, max_intensity=0.1):
    """
    overlays a random color over the whole image, max intensity controlls the transparency
    """
    overlay = Image.new('RGBA', (1, 1), (random.randint(0, 255),
                                         random.randint(0, 255),
                                         random.randint(0, 255),
                                         random.randint(0, int(255 * max_intensity))))
    
    return Image.alpha_composite(original_img.convert('RGBA'), overlay.resize(original_img.size))

In [9]:
def normalize_img_size(original_img, long_side_size=640):
    """
    makes it so no matter what, the long side of the image given is 640px
    this is used because in the dataset materials, the enemy images have different sizes, so the network wont get used
    to certain classes being naturally smaller than others
    """
    new_width = long_side_size if original_img.size[0] >= original_img.size[1] else int(original_img.size[0] * (long_side_size / original_img.size[1]))
    new_height = long_side_size if original_img.size[0] < original_img.size[1] else int(original_img.size[1] * (long_side_size / original_img.size[0]))
    return original_img.resize((new_width, new_height))

In [10]:
def paste_random_position(background_img, new_object_img, scale_variation=(0.1, 0.5), rotation_variation=(-25, 25)):
    """
    self explanatory
    """
    # apply transformations
    new_object_img_size = new_object_img.size
    
    # note that the resize works independently on the x and y axis, so the image might be squished and streched
    new_object_img = warp_img(new_object_img)
    chosen_warp = random.randint(int(new_object_img_size[0] * scale_variation[0]), int(new_object_img_size[0] * scale_variation[1]))
    new_object_img = new_object_img.resize((chosen_warp, int(chosen_warp * (random.randint(5, 15) / 10))))
    
    new_object_img = new_object_img.rotate(random.randint(*rotation_variation), expand=True)
    
    # measure the size again because the rotation changes it when expand==True (if its ==False then image will be cropped)
    new_object_img_size = new_object_img.size
    
    # !!!THIS IS THE PART THAT DECIDES WHERE TO PASTE THE IMAGE!!!
    max_start_x = max(background_img.size[0] - new_object_img_size[0], 0)
    max_start_y = max(background_img.size[1] - new_object_img_size[1], 0)

    # Update the paste positions to ensure the image fits within bounds
    paste_start_x = random.randint(0, max_start_x)
    paste_start_y = random.randint(0, max_start_y)
    paste_end_x = paste_start_x + new_object_img_size[0]
    paste_end_y = paste_start_y + new_object_img_size[1]
    
    # finally paste the image over the background
    background_img.paste(new_object_img, (paste_start_x, paste_start_y), new_object_img)
    
    # convert from "corners" foramt to "center + width/height" format (also normalize)
    bnd_box_center_x = (paste_start_x + paste_end_x) / 2 / background_img.size[0]
    bnd_box_center_y = (paste_start_y + paste_end_y) / 2 / background_img.size[1]
    bnd_box_width = (paste_end_x - paste_start_x) / background_img.size[0]
    bnd_box_height = (paste_end_y - paste_start_y) / background_img.size[1]
    
    return bnd_box_center_x, bnd_box_center_y, bnd_box_width, bnd_box_height

In [11]:
def add_obstruction(original_img, obstruction_img):
    """
    overlays an image over another image, useful for overlaying a obstruction over an enemy image
    """
    result_img = original_img.copy()
    result_img.paste(obstruction_img.resize(original_img.size), (0, 0), obstruction_img.resize(original_img.size))
    return result_img

In [12]:
# takes in the "materials" and uses them to make a randomly generated image along with its labels
# num_of_enemies       =   randomly choose how many enemies to paste on the background
# scale_variation      =   how much to warp the scale of each enemy
# rotation variation   =   in degrees, how much to randomly rotate the enemies on the image
def generate_image(backgrounds_path, enemies_path, obstructions_path, num_of_enemies = (0, 10), max_obstructions=2, max_false_obstructions=10):
    """
    applies all of the functions above to return a finished product
    """
    # define the "answer" tensors that will be used to train the net
    enemy_bnd_boxes = []
    enemy_classes = []
    
    # chosen_background_path is a path to an random image from the folder background_path
    chosen_background_path = os.listdir(backgrounds_path)
    obstruction_files = os.listdir(obstructions_path)
    chosen_background_path = backgrounds_path + "\\" + chosen_background_path[random.randint(0, len(chosen_background_path) - 1)]
    
    # load the background to PIL (this image will be used as the foundation to paste other images on)
    background_image = Image.open(chosen_background_path)
    
    chosen_num_of_enemies = random.randint(*num_of_enemies)
    
    # add random obstructions to the background
    for _ in range(random.randint(0, max_false_obstructions)):
        current_obstruction = obstruction_files[random.randint(0, len(obstruction_files) - 1)]
        current_obstruction = obstructions_path + "\\" + current_obstruction
        current_obstruction = Image.open(current_obstruction)
        paste_random_position(background_image, current_obstruction)
    
    # add the enemies
    for _ in range(chosen_num_of_enemies):
        # choose what enemy image we will be overlaying now
        enemy_folders = os.listdir(enemies_path)
        chosen_enemy_class = random.randint(0, len(enemy_folders) - 1)
        chosen_folder = enemies_path + "\\" + enemy_folders[chosen_enemy_class]
        chosen_enemy_class = folder_class_priority.index(enemy_folders[chosen_enemy_class])
        enemy_files = os.listdir(chosen_folder)
        chosen_file = chosen_folder + "\\" + enemy_files[random.randint(0, len(enemy_files) - 1)]
        
        # load image that will be pasted
        current_enemy_image = Image.open(chosen_file)
        
        # ensure consistent sizing across images with different resolutions
        # make sure there is no warping using the original resolution of the image
        current_enemy_image = normalize_img_size(current_enemy_image)
        
        # add a random number of random obstructions to each enemy
        for _ in range(random.randint(0, max_obstructions)):
            current_obstruction = random.randint(0, len(obstruction_files) - 1)
            current_obstruction = obstructions_path + "\\" + obstruction_files[current_obstruction]
            current_obstruction = Image.open(current_obstruction)
            current_enemy_image = add_obstruction(current_enemy_image, current_obstruction)
        
        bnd_box_center_x, bnd_box_center_y, bnd_box_width, bnd_box_height = paste_random_position(background_image, current_enemy_image)
        
        enemy_bnd_boxes.append([bnd_box_center_x, bnd_box_center_y, bnd_box_width, bnd_box_height])
        enemy_classes.append(chosen_enemy_class)
    
    background_image = overlay_random_color(background_image)
    
    return background_image, enemy_bnd_boxes, enemy_classes

In [13]:
# "main" function that generates a dataset from "materials" given to it
def generate_dataset(backgrounds_path, objects_path, images_path, labels_path, dataset_size, start_idx):
    """
    runs the function above a certain number of times
    """
    
    for x in range(dataset_size):
        current_name = f"auto_generated_train_{x + start_idx}"
        
        generated_image, bnd_boxes, classes = generate_image(backgrounds_path, objects_path, obstructions_path)
        
        generated_image.save(os.path.join(images_path, current_name + ".png"), "png")
        
        write_label(os.path.join(labels_path, current_name + ".txt"), classes, bnd_boxes)

In [14]:
def create_workers(backgrounds_path, objects_path, images_path, labels_path, dataset_size, num_workers):
    """
    this is to speed up the process (utilize 130% of your cpu)
    """
    
    individual_tasks = int(dataset_size / num_workers)
    worker_threads = []
    
    for current_worker in range(num_workers):
        worker_thread = threading.Thread(target=generate_dataset, args=(backgrounds_path, objects_path, images_path, labels_path, individual_tasks, current_worker * individual_tasks))
        worker_threads.append(worker_thread)
    
    return worker_threads

In [15]:
def check_files():
    # make sure that the folder structure is actually there
    os.makedirs(train_images_folder, exist_ok=True)
    os.makedirs(val_images_folder, exist_ok=True)
    os.makedirs(train_labels_folder, exist_ok=True)
    os.makedirs(val_labels_folder, exist_ok=True)

    if not (os.path.exists(backgrounds_path) and os.path.exists(objects_path) and os.path.exists(obstructions_path)):
        print("the thing does not exist. please put the materials thing into the same directory as this file.")
    else:
        print("everything seems to be set up. check_files() is paying for you before you start the threads 🙏")

In [16]:
check_files()

everything seems to be set up. check_files() is paying for you before you start the threads 🙏


In [17]:
train_threads = create_workers(backgrounds_path, objects_path, train_images_folder, train_labels_folder,
                               int(num_images_to_generate * train_split), # dataset size
                               1 + int(train_split * num_workers)) # workers for this part

In [18]:
val_threads = create_workers(backgrounds_path, objects_path, val_images_folder, val_labels_folder,
                             int(num_images_to_generate * val_split), # dataset size
                             1 + int(val_split * num_workers)) # workers for this part

In [19]:
for x in tqdm(train_threads):
    x.start()

print("started train dataset generation threads succesfully")

for x in tqdm(val_threads):
    x.start()

print("started val dataset generation threads succesfully")

print("you will know when the threads finish working when CPU utilization goes back to normal (look at task manager)")
print("if you want to stop the threads prematurly then either restart runtime, or kill all python processes also from task manager")

100%|██████████| 4/4 [00:00<00:00, 3999.34it/s]


started train dataset generation threads succesfully


100%|██████████| 1/1 [00:00<?, ?it/s]

started val dataset generation threads succesfully
you will know when the threads finish working when CPU utilization goes back to normal (look at task manager)
if you want to stop the threads prematurly then either restart runtime, or kill all python processes also from task manager





In [20]:
# after all that finished running clone the YOLOv5 repository, configure everything following this tutorial:
# https://www.youtube.com/watch?v=tFNJGim3FXw&t=3516s
# this is the training command that worked fine on RTX2060 6GB vram, adjust batches for your GPU

r"""
python train.py
--img 1024
--batch 5
--data dataset.yaml
--workers 6
--weights [PATH/TO/YOLOV5/REPO]\yolov5\yolov5m.pt
--epochs 80
""".replace("\n", " ")[1:-1]

'python train.py --img 1024 --batch 5 --data dataset.yaml --workers 6 --weights [PATH/TO/YOLOV5/REPO]\\yolov5\\yolov5m.pt --epochs 80'

: 